State Farm Distracted Drivers

Prev Exercises: Udacity:DeepLearning:TensorFlow:notMNIST

Fit Logistic Regression (TensorFlow)

In [2]:
import sys
print sys.version

from joblib import Parallel, delayed  
import multiprocessing
nCores = multiprocessing.cpu_count() - 2 # Allow other apps to run
print 'nCores: %d' % (nCores)

from datetime import datetime, time
print 'now: %s' % str(datetime.now())
2.7.11 (default, Jan 28 2016, 14:07:46) 
[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)]
nCores: 14
now: 2016-05-20 10:11:54.404326
In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display, Image
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects.lib import grid
from rpy2.robjects.lib import ggplot2
import rpy2.robjects.pandas2ri

import numpy as np
np.set_printoptions(precision=4, suppress=True)
import os
import pandas as pd
from scipy import ndimage
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle

import tensorflow as tf
print 'tf.__version__:%s' % str(tf.__version__)

%run img_utils.py
tf.__version__:0.8.0
/usr/local/lib/python2.7/site-packages/rpy2/robjects/lib/ggplot2.py:59: UserWarning: This was designed againt ggplot2 version 2.0.0 but you have 2.1.0
  warnings.warn('This was designed againt ggplot2 version %s but you have %s' % (TARGET_VERSION, ggplot2.__version__))

Analytics Specs

This Project

In [4]:
%run img_glbSpec_SFDD_ImgSz_64.py
imported img_glbSpec_SFDD_Img_Sz_64.py
In [5]:
# print '\nglbDataFile: %s' % (glbDataFile)

print '\nglbRspClass: %s' % (glbRspClass)
print 'glbRspClassN: %d' % (glbRspClassN)
print 'glbRspClassDesc: '; print(glbRspClassDesc)

print '\nglbImg:'; print(glbImg)

print '\nglbTfwVarSeed: %d' % (glbTfwVarSeed)

print '\nglbPickleFile: %s' % (glbPickleFile)
glbRspClass: ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
glbRspClassN: 10
glbRspClassDesc: 
{'c9': 'talking to passenger', 'c8': 'hair and makeup', 'c3': 'texting - left', 'c2': 'talking on the phone - right', 'c1': 'texting - right', 'c0': 'normal driving', 'c7': 'reaching behind', 'c6': 'drinking', 'c5': 'operating the radio', 'c4': 'talking on the phone - left'}

glbImg:
{'color': False, 'crop': {'x': (80, 560)}, 'shape': (480, 640, 3), 'pxlDepth': 255.0, 'center_scale': True, 'size': 64}

glbTfwVarSeed: 131

glbPickleFile: {'models': 'data/img_M_SFDD_ImgSz_64.pickle', 'data': 'data/img_D_SFDD_ImgSz_64.pickle'}

Import Data

This Project

In [6]:
%run img_utils.py
glbObsFitIdn, glbObsFitFtr, glbObsFitRsp, \
glbObsVldIdn, glbObsVldFtr, glbObsVldRsp, \
glbObsNewIdn, glbObsNewFtr, glbObsNewRsp, \
sbtNewCorDf, \
_ = myimportDbs(glbPickleFile['data'])

glbObsTrnIdn = glbObsFitIdn + glbObsVldIdn
glbObsTrnFtr = np.vstack((glbObsFitFtr, glbObsVldFtr))
glbObsTrnRsp = np.concatenate((glbObsFitRsp, glbObsVldRsp))

print('Fit pickled set:', 
      len(glbObsFitIdn), glbObsFitFtr.shape, glbObsFitRsp.shape)
print('Vld pickled set:', 
      len(glbObsVldIdn), glbObsVldFtr.shape, glbObsVldRsp.shape)
print('Trn pickled set:', 
      len(glbObsTrnIdn), glbObsTrnFtr.shape, glbObsTrnRsp.shape)    
print('New pickled set:', 
      len(glbObsNewIdn), glbObsNewFtr.shape, glbObsNewRsp.shape)
Importing database from data/img_D_SFDD_ImgSz_64.pickle...
('Fit pickled set:', 18077, (18077, 64, 64), (18077,))
('Vld pickled set:', 4347, (4347, 64, 64), (4347,))
('Trn pickled set:', 22424, (22424, 64, 64), (22424,))
('New pickled set:', 79726, (79726, 64, 64), (79726,))

First reload the data we generated in 1_notmist.ipynb.

In [7]:
# pickle_file = 'data/notMNIST.pickle'

# with open(pickle_file, 'rb') as f:
#   save = pickle.load(f)
#   glbXFit = save['glbXFit']
#   glbYFit = save['glbYFit']
#   glbXVld = save['glbXVld']
#   glbYVld = save['glbYVld']
#   glbXNew = save['glbXNew']
#   glbYNew = save['glbYNew']
#   del save  # hint to help gc free up memory
#   print('Training set', glbXFit.shape, glbYFit.shape)
#   print('Validation set', glbXVld.shape, glbYVld.shape)
#   print('Test set', glbXNew.shape, glbYNew.shape)

Reformat into a shape that's more adapted to the models we're going to train:

  • data as a flat matrix,
  • labels as float 1-hot encodings.
In [7]:
def lclreformatData(I, X, Y):  
  X = X.reshape((-1, glbImg['size'] * glbImg['size'])).astype(np.float32)
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  Y = (np.arange(glbRspClassN) == Y[:,None]).astype(np.float32)
  return I, X, Y

glbITrn, glbXTrn, glbYTrn = lclreformatData(
    glbObsTrnIdn, glbObsTrnFtr, glbObsTrnRsp)
glbIFit, glbXFit, glbYFit = lclreformatData(
    glbObsFitIdn, glbObsFitFtr, glbObsFitRsp)
glbIVld, glbXVld, glbYVld = lclreformatData(
    glbObsVldIdn, glbObsVldFtr, glbObsVldRsp)
glbINew, glbXNew, glbYNew = lclreformatData(
    glbObsNewIdn, glbObsNewFtr, glbObsNewRsp)

print('Trn reshaped set:', len(glbITrn), glbXTrn.shape, glbYTrn.shape)
print('Fit reshaped set:', len(glbIFit), glbXFit.shape, glbYFit.shape)
print('Vld reshaped set:', len(glbIVld), glbXVld.shape, glbYVld.shape)
print('New reshaped set:', len(glbINew), glbXNew.shape, glbYNew.shape)
('Trn reshaped set:', 22424, (22424, 4096), (22424, 10))
('Fit reshaped set:', 18077, (18077, 4096), (18077, 10))
('Vld reshaped set:', 4347, (4347, 4096), (4347, 10))
('New reshaped set:', 79726, (79726, 4096), (79726, 10))
In [8]:
# Check how much incremental memory is used for Trn obs
del glbObsTrnIdn, glbObsTrnFtr, glbObsTrnRsp
del glbITrn, glbXTrn, glbYTrn
In [9]:
print glbObsTrnFtr.shape
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-12c4c58b62cd> in <module>()
----> 1 print glbObsTrnFtr.shape

NameError: name 'glbObsTrnFtr' is not defined
In [9]:
# glbImg['size'] = 28
# glbRspClassN = 10

# def reformat(dataset, labels):
#   dataset = dataset.reshape((-1, glbImg['size'] * glbImg['size'])).astype(np.float32)
#   # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
#   labels = (np.arange(glbRspClassN) == labels[:,None]).astype(np.float32)
#   return dataset, labels
# glbXFit, glbYFit = reformat(glbXFit, glbYFit)
# glbXVld, glbYVld = reformat(glbXVld, glbYVld)
# glbXNew, glbYNew = reformat(glbXNew, glbYNew)
# print('Training set', glbXFit.shape, glbYFit.shape)
# print('Validation set', glbXVld.shape, glbYVld.shape)
# print('Test set', glbXNew.shape, glbYNew.shape)

Fit Logistic Regression (TensorFlow)

We're first going to train a multinomial logistic regression using simple gradient descent.

TensorFlow works like this:

  • First you describe the computation that you want to see performed: what the inputs, the variables, and the operations look like. These get created as nodes over a computation graph. This description is all contained within the block below:

    with graph.as_default():
        ...
  • Then you can run the operations on this graph as many times as you want by calling session.run(), providing it outputs to fetch from the graph that get returned. This runtime operation is all contained in the block below:

    with tf.Session(graph=graph) as session:
        ...

Let's load all the data into TensorFlow and build the computation graph corresponding to our training:

In [10]:
print "glbYNew[:5, :]:" % ()
print glbYNew[:5, :]
glbYNew[:5, :]:
[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
In [10]:
%run img_glbSpec_SFDD_ImgSz_64.py
%run img_utils.py

# Add parameter for lrnRateTfw = 0.5 for GradientDescentOptimizer

# With gradient descent training, even this much data is prohibitive.
# Subset the training data for faster turnaround.
def fitMdlLgtRgrTfw(lclXFit, lclYFit, 
                    nObsFit = 50, nStepsTfw = 11, lrnRateTfw = 0.5,
                    visualize = False, newObs = False, verbose = False):
    
    from sklearn import metrics as skl_metrics
    
    print('\nLogistic Regression (TensorFlow): ' + \
          "nObsFit:%5d; nStepsTfw:%5d; lrnRateTfw:%.4f" % ( \
            nObsFit, nStepsTfw, lrnRateTfw))
    print("  visualize: %s; newObs: %s; verbose: %s" % ( \
            visualize, newObs, verbose))
    
    startTm = datetime.now()

    mdlDf = pd.DataFrame({'id': 'LgtRgr.tfw',
                             'nObsFit'  : [nObsFit],
                             'nStepsTfw': [nStepsTfw],
                            'lrnRateTfw': [lrnRateTfw]
                         })
    
    graph = tf.Graph()
    with graph.as_default():

      # Input data.
      # Load the training, validation and test data into constants that 
      #  are attached to the graph.
      tfwXFit = tf.constant(lclXFit[:nObsFit, :])
      tfwYFit = tf.constant(lclYFit[:nObsFit])
      tfwXVld = tf.constant(glbXVld)
      tfwYVld = tf.constant(glbYVld)   
      # Create placeholders for ObsNew & occlusion visualization
      tfwXNew = tf.placeholder(tf.float32, 
                shape = (glbImg['size'], lclXFit.shape[1]))
      tfwYNew = tf.placeholder(tf.float32, 
                shape = (glbImg['size'], lclYFit.shape[1]))

      # Variables.
      tf.set_random_seed(glbTfwVarSeed)
    
      # These are the parameters that we are going to be training. 
      # The weight matrix will be initialized using random valued 
      # following a (truncated) normal distribution. 
      # The bias vector get initialized to zero.
      tfwW = tf.Variable(
        tf.truncated_normal([glbImg['size'] * glbImg['size'], 
                             glbRspClassN]), 
        name = 'tfwW')
      tfwB = tf.Variable(tf.zeros([glbRspClassN]), name = 'tfwB')
      if (verbose):  
          print('  tfwW:', tfwW.initialized_value())
          print('  tfwB:', tfwB.initialized_value())
#         print 'lblIx:%2d:%s'% \
#     (np.vectorize("%.4e".__mod__)(tfwW.value()[:5, lblIx]))

      # Training computation.
      # We multiply the inputs with the weight matrix, and add bias. 
      # We compute the softmax and cross-entropy (it's one operation in
      # TensorFlow, because it's very common, and it can be optimized). 
      # We take the average of this cross-entropy across all training 
      # examples: that's our loss.
      logits = tf.matmul(tfwXFit, tfwW) + tfwB
      loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tfwYFit))

      # Optimizer.
      # We are going to find the minimum of this loss using 
      #  gradient descent.
      optimizer = (tf.train
                   .GradientDescentOptimizer(tf.to_float(lrnRateTfw))
                   .minimize(loss))

      # Predictions for the training, validation, and test data.
      # These are not part of training, but merely here so that we can
      # report accuracy_score figures as we train.
      tfwYFitPby = tf.nn.softmax(logits)
      tfwYVldPby = tf.nn.softmax(tf.matmul(tfwXVld, tfwW) + tfwB)
      tfwYNewPby = tf.nn.softmax(tf.matmul(tfwXNew, tfwW) + tfwB)
#       print '  fitMdlLgtRgrTfw: type(tfwYOccPby): %s' % \
#             (type(tfwYOccPby))  
#       print '  fitMdlLgtRgrTfw: ' + \
#             tfwYOccPby.get_shape().as_list(): %s' % \
#         (tfwYOccPby.get_shape().as_list())      
    
    def accuracy_score(predictions, labels):
      return (1.0 * np.sum(np.argmax(predictions, 1) == 
                           np.argmax(labels, 1))
              / predictions.shape[0])

    tf.set_random_seed(glbTfwVarSeed)
    with tf.Session(graph=graph) as session:
      # This is a one-time operation which ensures the parameters get
      # initialized as we described in the graph: 
      # random tfwW for the matrix, zeros for the tfwB. 
      tf.initialize_all_variables().run()
      if verbose:
          print('  Initialized')
        
      # Create a dummy feed for occlusion visualization
#       print "  fitMdlLgtRgrTfw: lclXFit.shape: %s" % \
#         str(lclXFit.shape)
#       print "  fitMdlLgtRgrTfw: " + \
#             "lclXFit[:glbImg['size'], :].shape: %s" % \
#         str(lclXFit[:glbImg['size'], :].shape)
      feed_dict = {tfwXNew: glbXNew[:glbImg['size'], :]}
      for step in range(int(nStepsTfw)):
        # Run the computations. 
        # We tell .run() that we want to run the optimizer,
        # and get the loss value and the training predictions returned
        # as numpy arrays.
        _, l, predictions = \
            session.run([optimizer, loss, tfwYFitPby], 
                        feed_dict = feed_dict)
            
        if mydspVerboseTrigger(step):
          thsDrn = (datetime.now() - startTm).seconds  
          if (thsDrn > 100):
              print('  logLoss at step %5d: %.4f (%d secs)' % \
                    (step, l, thsDrn))
        
      accFit = accuracy_score(tfwYFitPby.eval()[:nObsFit, :], 
                              tfwYFit.eval()   [:nObsFit, :])
      logLossFit = \
        skl_metrics.log_loss(tfwYFit.eval()   [:nObsFit, :], 
                             tfwYFitPby.eval()[:nObsFit, :])
      if verbose:
        print '\n  Fit accuracy:%0.4f' % (accFit)
        print   '  Fit  logLoss:%0.4f' % (logLossFit)        
    #         print metrics.confusion_matrix(glbYFit[0:nObsFit], 
    #                                         lclYFitPdn)        

      # Calling .eval() on tfwObsVldPred is basically like calling run(), 
      # but just to get that one numpy array. 
      # Note that it recomputes all its graph dependencies.

      lclYVldPby = tfwYVldPby.eval()
      lclYVldPdn = np.argmax(lclYVldPby, 1)
      mdlDf['accVld'] = accVld = accuracy_score(lclYVldPby, glbYVld)
      cnfYVld = skl_metrics.confusion_matrix(glbObsVldRsp, lclYVldPdn)
      accYVldCls = cnfYVld.diagonal() * 1.0 / cnfYVld.sum(axis = 1)
      mdlDf['accVldCls'] = None
      mdlDf.set_value(0, 'accVldCls', {'accCls' : accYVldCls})
    
      mdlDf['logLossVld'] = logLossVld = skl_metrics.log_loss(
                                                glbYVld, lclYVldPby)
      logLossVldCls = mygetMetricLogLoss(glbYVld, lclYVldPby, 
                                         returnTyp = 'class')
      mdlDf['logLossVldCls'] = None
      mdlDf.set_value(0, 'logLossVldCls', 
                      {'logLossCls' : logLossVldCls})

      if verbose:
        print '\n  Vld accuracy:%0.4f' % (accVld)
        print accYVldCls
        print cnfYVld
        yLbl = [glbRspClassDesc[glbRspClass[ix]] + ':' + \
                  glbRspClass[ix] + ':actl' \
                for ix in xrange(glbRspClassN)]
        xLbl = ['pred:' + glbRspClass[ix] + ':' + \
                glbRspClassDesc[glbRspClass[ix]] \
                   for ix in xrange(glbRspClassN)]
        # print labels
        plt.matshow(cnfYVld, cmap='Reds', interpolation='none')
        plt.yticks(np.arange(10), yLbl)
        plt.xticks(np.arange(10), xLbl, rotation=90);
        plt.show()

        print '\n  Vld  logLoss:%0.4f' % (logLossVld)
        print logLossVldCls

      if visualize:
        mydisplayImagePredictions(session, tfwW.eval(),
                glbIVld, glbObsVldFtr, glbObsVldRsp, lclYVldPby, 
                glbRspClass, glbRspClassDesc, imgVisualFn = None, 
                            tfwXOcc = tfwXNew, tfwYOccPby = tfwYNewPby)    
            
      if newObs:
          print "  predicting %5d new obs..." % (glbYNew.shape[0])  
          lclYNewPby = np.zeros((glbYNew.shape[0], 
                                 tfwYFitPby.get_shape().as_list()[1]))
          lclYNewPby[:, :] = -1.0
          btcSz = tfwXNew.get_shape().as_list()[0]  
          for obsIx in xrange(0, glbYNew.shape[0], btcSz):
            if mydspVerboseTrigger(obsIx) and \
              (datetime.now() - startTm).seconds > 60:
                print "    @%5d secs: obsIx: %5d" % \
                    ((datetime.now() - startTm).seconds, obsIx)    
            obsEnd = obsIx + btcSz
            if obsEnd > lclYNewPby.shape[0]: 
                obsEnd = lclYNewPby.shape[0]                
            btcYNewPby = session.run(tfwYNewPby, 
                feed_dict = {tfwXNew: glbXNew[obsIx:obsEnd, :] \
                    if obsEnd != lclYNewPby.shape[0] \
                    else np.vstack((glbXNew[obsIx:obsEnd, :], 
                            glbXNew[0:((obsIx + btcSz) % obsEnd), :]))
                                })
            lclYNewPby[obsIx:obsEnd, :] = btcYNewPby[:, :] \
                                if obsEnd != lclYNewPby.shape[0] \
                                else btcYNewPby[:(obsEnd - obsIx), :]    
          
          assert (lclYNewPby[:, :] != -1.0).all(), \
            'some cells in lclYNewPby == -1.0'
#           lclYNewPdn      = tfwYNew.eval()            
#           lclYNewPby = tfwYNewPby.eval()
          lclYNewPdn = np.argmax(lclYNewPby, 1) 
          #if (tfwYNew.eval() > -1).any():
          if (len(np.unique(glbYNew, return_counts = True)[0]) > 1):        
              mdlDf['accNew'] = accNew = accuracy_score(lclYNewPby, 
                                                        glbYNew)
              mdlDf['logLossNew'] = logLossNew = skl_metrics.log_loss(
                                                glbYNew, lclYNewPby)    
              if verbose:      
                print '\n  New accuracy:%0.4f' % (accNew)
                print   '  New  logLoss:%0.4f' % (logLossNew)        
                print skl_metrics.confusion_matrix(glbObsNewRsp, 
                                                   lclYNewPdn)
                yLbl = [glbRspClassDesc[glbRspClass[ix]] + ':' + 
                          glbRspClass[ix] + ':actl' \
                        for ix in xrange(glbRspClassN)]
                xLbl = ['pred:' + glbRspClass[ix] + ':' + \
                        glbRspClassDesc[glbRspClass[ix]] \
                           for ix in xrange(glbRspClassN)]
                # print labels
                plt.matshow(skl_metrics.confusion_matrix(glbObsNewRsp, 
                                                         lclYNewPdn), 
                            cmap='Reds', interpolation='none')
                plt.yticks(np.arange(10), yLbl)
                plt.xticks(np.arange(10), xLbl, rotation=90);
                plt.show()
                
          if visualize:      
              mydisplayImagePredictions(session, tfwW.eval(),
                glbINew, glbObsNewFtr, glbObsNewRsp, lclYNewPby, 
                glbRspClass, glbRspClassDesc, imgVisualFn = None, 
                        tfwXOcc = tfwXNew, tfwYOccPby = tfwYNewPby)    

          mdlDf['predNew'] = None
          mdlDf.set_value(0, 'predNew', {'kntCls' : np.unique(lclYNewPdn, 
                                            return_counts = True)})
          if verbose:
            print '\n  New prediction knts:'
            print mdlDf['predNew'][0]
            
      # indentation (6 spaces) determines scope of this
      #   before session.__exit__ & graph.__exit__        
      mdlDf['model'] = session 
    
    mdlDf['duration'] = (datetime.now() - startTm).seconds  
    print('  duration: %.2d seconds' % (mdlDf['duration'][0]))  
    
    if not newObs: lclYNewPby = None
    return(mdlDf, lclYVldPby, lclYNewPby)

tmpMdlDf = pd.DataFrame()

# thsMdlDf, thsYVldPby, thsYNewPby = fitMdlLgtRgrTfw(
#     glbXFit, glbYFit, 
#     nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5,
#     visualize = True, newObs = True, verbose = True)
# mdlDf = mdlDf.append(thsMdlDf)

# To check if model results are deterministic & 
# all run options work separately
# thsMdlDf, thsYVldPby, thsYNewPby = fitMdlLgtRgrTfw(
#     glbXFit, glbYFit, 
#     nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5, 
#     visualize = True, newObs = False, verbose = False)
# mdlDf = mdlDf.append(thsMdlDf)

thsMdlDf, thsYVldPby, thsYNewPby = fitMdlLgtRgrTfw(
    glbXFit, glbYFit, 
    nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5, 
    visualize = False, newObs = True, verbose = False)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)

thsMdlDf, thsYVldPby, thsYNewPby = fitMdlLgtRgrTfw(
    glbXFit, glbYFit, 
    nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5, 
    visualize = False, newObs = False, verbose = True)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)

thsMdlDf, thsYVldPby, thsYNewPby = fitMdlLgtRgrTfw(
    glbXFit, glbYFit, 
    nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5,
    visualize = False, newObs = False, verbose = False)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)

# thsMdlDf, thsYVldPby, thsYNewPby = fitMdlLgtRgrTfw(
#     glbXFit, glbYFit, 
#     nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.1, 
#     visualize = False, newObs = False, verbose = True)
# tmpMdlDf = tmpMdlDf.append(thsMdlDf)

print '\ntmpMdlDf: '
print(tmpMdlDf)
imported img_glbSpec_SFDD_Img_Sz_64.py

Logistic Regression (TensorFlow): nObsFit:  100; nStepsTfw:   10; lrnRateTfw:0.5000
  visualize: False; newObs: True; verbose: False
  predicting 79726 new obs...
  duration: 14 seconds

Logistic Regression (TensorFlow): nObsFit:  100; nStepsTfw:   10; lrnRateTfw:0.5000
  visualize: False; newObs: False; verbose: True
('  tfwW:', <tf.Tensor 'Identity:0' shape=(4096, 10) dtype=float32>)
('  tfwB:', <tf.Tensor 'Identity_1:0' shape=(10,) dtype=float32>)
  Initialized

  Fit accuracy:0.2400
  Fit  logLoss:11.9237

  Vld accuracy:0.1449
[ 0.3409  0.1067  0.      0.8104  0.0065  0.0047  0.      0.0114  0.
  0.1018]
[[165  23   0 226  51   5   0   4   0  10]
 [ 49  51   0 377   1   0   0   0   0   0]
 [127  64   0 204  56   0   0   1   0   0]
 [ 30  10   0 359   6   0   0   0   0  38]
 [ 28  20   1 378   3   0   0   0   0  29]
 [ 23  20   0 283  84   2   0   9   0   1]
 [ 78  33   0 310  20   0   0   0   0   2]
 [ 41  29   0 216  51   3   0   4   0   6]
 [ 96   8   0 242  14   2   0   1   0   1]
 [ 91  48   0 217  17   3   1  29   0  46]]
  Vld  logLoss:21.0945
[ 0.7392  2.9072  2.6943  0.1048  2.0342  2.5351  3.4814  1.6954  2.8434
  2.0595]
  duration: 01 seconds

Logistic Regression (TensorFlow): nObsFit:  100; nStepsTfw:   10; lrnRateTfw:0.5000
  visualize: False; newObs: False; verbose: False
  duration: 01 seconds

tmpMdlDf: 
     accVld                                          accVldCls  duration  \
0  0.144928  {u'accCls': [0.340909090909, 0.106694560669, 0...        14   
0  0.144928  {u'accCls': [0.340909090909, 0.106694560669, 0...         1   
0  0.144928  {u'accCls': [0.340909090909, 0.106694560669, 0...         1   

           id  logLossVld                                      logLossVldCls  \
0  LgtRgr.tfw    21.09451  {u'logLossCls': [0.739175352442, 2.90720444303...   
0  LgtRgr.tfw    21.09451  {u'logLossCls': [0.739175352442, 2.90720444303...   
0  LgtRgr.tfw    21.09451  {u'logLossCls': [0.739175352442, 2.90720444303...   

   lrnRateTfw                                              model  nObsFit  \
0         0.5  <tensorflow.python.client.session.Session obje...      100   
0         0.5  <tensorflow.python.client.session.Session obje...      100   
0         0.5  <tensorflow.python.client.session.Session obje...      100   

   nStepsTfw                                            predNew  
0         10  {u'kntCls': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
0         10                                                NaN  
0         10                                                NaN  
In [30]:
lr1MdlDf, lr1YVldPby, lr1YNewPby = fitMdlLgtRgrTfw(
    glbXFit, glbYFit, 
    nObsFit = 1000, nStepsTfw = 100, lrnRateTfw = 0.1, 
    visualize = False, newObs = True, verbose = True)
Logistic Regression (TensorFlow): nObsFit: 1000; nStepsTfw:  100; lrnRateTfw:0.1000
  visualize: False; newObs: True; verbose: True
('  tfwW:', <tf.Tensor 'Identity:0' shape=(4096, 10) dtype=float32>)
('  tfwB:', <tf.Tensor 'Identity_1:0' shape=(10,) dtype=float32>)
  Initialized

  Fit accuracy:0.2470
  Fit  logLoss:7.2014

  Vld accuracy:0.1735
  Vld  logLoss:12.6001
[[ 73  22  44  15  77   7  94  50  18  84]
 [ 33  68  51  31  24  22  90  77  10  72]
 [ 14 100  61   9 115   1  28 113   2   9]
 [  2  18  29 136  34   7  37  19  10 151]
 [  5  14  17 137  67   9  58  91   3  58]
 [ 11  16  19  41 114  13  14 151  30  13]
 [  9  47  31  55 113   9  45  96  19  19]
 [  1  16  10  14  57  28  45 130  24  25]
 [ 21  12  47  36  47  16  50  66   8  61]
 [  2  12  15   8  38   9  56 140  19 153]]
  predicting 79726 new obs...

  New prediction knts:
{'clsKnt': (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([ 8786, 11144, 10374,  6453,  5874,  4904,  9130,  6548,  7613,  8900]))}
  duration: 19 seconds
In [31]:
lr5MdlDf, lr5YVldPby, lr5YNewPby = fitMdlLgtRgrTfw(
    glbXFit, glbYFit, 
    nObsFit = 1000, nStepsTfw = 100, lrnRateTfw = 0.5, 
    visualize = False, newObs = True, verbose = True)
Logistic Regression (TensorFlow): nObsFit: 1000; nStepsTfw:  100; lrnRateTfw:0.5000
  visualize: False; newObs: True; verbose: True
('  tfwW:', <tf.Tensor 'Identity:0' shape=(4096, 10) dtype=float32>)
('  tfwB:', <tf.Tensor 'Identity_1:0' shape=(10,) dtype=float32>)
  Initialized

  Fit accuracy:0.4970
  Fit  logLoss:5.6870

  Vld accuracy:0.2227
  Vld  logLoss:13.3804
[[148  76  77   0  65   0   4  41   4  69]
 [ 58  87 100   0  44   4   2 121   9  53]
 [ 11  63 120   0 110   0   0 121   0  27]
 [ 10  36  83   1 132   3   3  31   0 144]
 [ 11  19  86   0 197   5  17  53   0  71]
 [ 34   7  66   0  79  35   1 121  38  41]
 [ 33  35 135   0 134   2   1  73   5  25]
 [  2  15  39   0  46  10   0 191  22  25]
 [ 30  24 105   0  40   6   0  62  32  65]
 [ 13  12  87   0  47   6   0 128   3 156]]
  predicting 79726 new obs...

  New prediction knts:
{'clsKnt': (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([12150, 13070, 22283,   135,  4653,  4845,   390,  7545,  5352,  9303]))}
  duration: 19 seconds
In [108]:
%run img_utils.py
from sklearn import metrics as skl_metrics    

def lclaccuracy_score(predictions, labels):
      return (1.0 * np.sum(np.argmax(predictions, 1) == 
                           np.argmax(labels, 1))
              / predictions.shape[0])
    
print "\nlrnRateTfw: 0.1; accVld: %0.4f" % \
    (lclaccuracy_score(lr1YVldPby, glbYVld))
cnf1YVld = skl_metrics.confusion_matrix(glbObsVldRsp, 
                                        np.argmax(lr1YVldPby, 1))
print cnf1YVld
# print cnf1YVld.sum(axis = 1)
print cnf1YVld.diagonal() * 1.0 / cnf1YVld.sum(axis = 1)
# print cnf1YVld.sum(axis = 1).sum()

lls1YVld = skl_metrics.log_loss(glbYVld, lr1YVldPby)
print "\nlrnRateTfw: 0.1; logLossVld: %0.4f" % (lls1YVld)
# lls1YVldCls = np.array([skl_metrics.log_loss(
#                glbYVld[glbYVld[:, clsIx] == 1, :], 
#             lr1YVldPby[glbYVld[:, clsIx] == 1, :]) \
#                      for clsIx in range(glbRspClassN)]) / glbRspClassN
# print "lrnRateTfw: 0.1; logLossVld.skl classSum: %0.4f" % (lls1YVld.sum())
# print lls1YVld
lls1YVldCls = mygetMetricLogLoss(glbYVld, lr1YVldPby, returnTyp = 'class')
print "lrnRateTfw: 0.1; logLossVldCls :"; print lls1YVldCls
# print "diff: %.4e" % (np.abs(lls1YVld - lls1YVldCls.sum()))
assert np.abs(lls1YVld - lls1YVldCls.sum()) < 1e-04, \
    "logLoss from skl: %.4f vs myCls %.4f does not match" % \
    (lls1YVld, lls1YVldCls.sum())
    
print "\nlrnRateTfw: 0.5; accVld: %0.4f" % \
    (lclaccuracy_score(lr5YVldPby, glbYVld))
cnf5YVld = skl_metrics.confusion_matrix(glbObsVldRsp, 
                                        np.argmax(lr5YVldPby, 1))
print cnf5YVld
print cnf5YVld.diagonal() * 1.0 / cnf5YVld.sum(axis = 1)

lls5YVld = skl_metrics.log_loss(glbYVld, lr5YVldPby)
print "\nlrnRateTfw: 0.5; logLossVld: %0.4f" % (lls5YVld)
lls5YVldCls = mygetMetricLogLoss(glbYVld, lr5YVldPby, returnTyp = 'class')
print "lrnRateTfw: 0.5; logLossVldCls :"; print lls5YVldCls
assert np.abs(lls5YVld - lls5YVldCls.sum()) < 1e-04, \
    "logLoss from skl: %.4f vs myCls %.4f does not match" % \
    (lls5YVld, lls5YVldCls.sum())
lrnRateTfw: 0.1; accVld: 0.1735
[[ 73  22  44  15  77   7  94  50  18  84]
 [ 33  68  51  31  24  22  90  77  10  72]
 [ 14 100  61   9 115   1  28 113   2   9]
 [  2  18  29 136  34   7  37  19  10 151]
 [  5  14  17 137  67   9  58  91   3  58]
 [ 11  16  19  41 114  13  14 151  30  13]
 [  9  47  31  55 113   9  45  96  19  19]
 [  1  16  10  14  57  28  45 130  24  25]
 [ 21  12  47  36  47  16  50  66   8  61]
 [  2  12  15   8  38   9  56 140  19 153]]
[ 0.1508  0.1423  0.135   0.307   0.146   0.0308  0.1016  0.3714  0.022
  0.3385]

lrnRateTfw: 0.1; logLossVld: 12.6001
lrnRateTfw: 0.1; logLossVldCls :
[ 1.4741  2.0839  1.4873  0.8025  1.0307  1.8989  1.0157  0.5303  1.2532
  1.0236]

lrnRateTfw: 0.5; accVld: 0.2227
[[148  76  77   0  65   0   4  41   4  69]
 [ 58  87 100   0  44   4   2 121   9  53]
 [ 11  63 120   0 110   0   0 121   0  27]
 [ 10  36  83   1 132   3   3  31   0 144]
 [ 11  19  86   0 197   5  17  53   0  71]
 [ 34   7  66   0  79  35   1 121  38  41]
 [ 33  35 135   0 134   2   1  73   5  25]
 [  2  15  39   0  46  10   0 191  22  25]
 [ 30  24 105   0  40   6   0  62  32  65]
 [ 13  12  87   0  47   6   0 128   3 156]]
[ 0.3058  0.182   0.2655  0.0023  0.4292  0.0829  0.0023  0.5457  0.0879
  0.3451]

lrnRateTfw: 0.5; logLossVld: 13.3804
lrnRateTfw: 0.5; logLossVldCls :
[ 1.0024  1.5107  0.8492  2.5585  0.8238  1.6644  2.5152  0.3775  1.327
  0.7518]
In [12]:
# glbMdlDf = None
# glbMdlDf = pd.DataFrame()

try:
    with open(glbPickleFile['models'], 'rb') as f:
        glbMdlDf = pickle.load(f)
        assert isinstance(glbMdlDf, pd.DataFrame), \
            'type(glbMdlDf): %s, expecting pd.DataFrame' % \
            (str(type(glbMdlDf)))            
except IOError, e:
    print e
    print 'file %s not present or not appropriate' % \
        (glbPickleFile['models'])        
print glbMdlDf         
                                           accVld  \
id         nStepsTfw nObsFit lrnRateTfw             
LgtRgr.skl -1.0      22424.0 -1.0        1.000000   
LgtRgr.tfw  1000.0   18077.0  10.0       0.375201   
LgtRgr.skl -1.0      18077.0 -1.0        0.343685   
LgtRgr.tfw  1000.0   18077.0  1.0        0.320221   
LgtRgr.skl -1.0      15000.0 -1.0        0.342535   
LgtRgr.tfw  1000.0   10000.0  10.0       0.384173   
LgtRgr.skl -1.0      10000.0 -1.0        0.358638   
LgtRgr.tfw  1000.0   10000.0  1.0        0.319991   
                              0.1        0.251208   
LgtRgr.skl -1.0      5000.0  -1.0        0.363699   
                     2000.0  -1.0        0.341385   
                     1000.0  -1.0        0.333333   
LgtRgr.tfw  100.0    1000.0   10.0       0.273522   
                              1.0        0.255809   
                              0.1        0.173453   
LgtRgr.skl -1.0      100.0   -1.0        0.307108   

                                                                                 accVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  10.0       {u'accCls': [0.504132231405, 0.435146443515, 0...   
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  1.0        {u'accCls': [0.326446280992, 0.209205020921, 0...   
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  10.0       {u'accCls': [0.456611570248, 0.380753138075, 0...   
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  1.0        {u'accCls': [0.289256198347, 0.182008368201, 0...   
                              0.1        {u'accCls': [0.169421487603, 0.144351464435, 0...   
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN   
                     2000.0  -1.0                                                      NaN   
                     1000.0  -1.0                                                      NaN   
LgtRgr.tfw  100.0    1000.0   10.0       {u'accCls': [0.646694214876, 0.0564853556485, ...   
                              1.0        {u'accCls': [0.423553719008, 0.182008368201, 0...   
                              0.1        {u'accCls': [0.150826446281, 0.142259414226, 0...   
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN   

                                        bestFit  duration          id  \
id         nStepsTfw nObsFit lrnRateTfw                                 
LgtRgr.skl -1.0      22424.0 -1.0           NaN       874  LgtRgr.skl   
LgtRgr.tfw  1000.0   18077.0  10.0          NaN       519  LgtRgr.tfw   
LgtRgr.skl -1.0      18077.0 -1.0          True       597  LgtRgr.skl   
LgtRgr.tfw  1000.0   18077.0  1.0           NaN       519  LgtRgr.tfw   
LgtRgr.skl -1.0      15000.0 -1.0         False       514  LgtRgr.skl   
LgtRgr.tfw  1000.0   10000.0  10.0          NaN       288  LgtRgr.tfw   
LgtRgr.skl -1.0      10000.0 -1.0         False       317  LgtRgr.skl   
LgtRgr.tfw  1000.0   10000.0  1.0           NaN       301  LgtRgr.tfw   
                              0.1           NaN       295  LgtRgr.tfw   
LgtRgr.skl -1.0      5000.0  -1.0         False       150  LgtRgr.skl   
                     2000.0  -1.0         False        55  LgtRgr.skl   
                     1000.0  -1.0         False        28  LgtRgr.skl   
LgtRgr.tfw  100.0    1000.0   10.0          NaN         7  LgtRgr.tfw   
                              1.0           NaN         5  LgtRgr.tfw   
                              0.1           NaN         5  LgtRgr.tfw   
LgtRgr.skl -1.0      100.0   -1.0         False         8  LgtRgr.skl   

                                         logLossVld  \
id         nStepsTfw nObsFit lrnRateTfw               
LgtRgr.skl -1.0      22424.0 -1.0          0.018463   
LgtRgr.tfw  1000.0   18077.0  10.0        19.209913   
LgtRgr.skl -1.0      18077.0 -1.0          3.116799   
LgtRgr.tfw  1000.0   18077.0  1.0          9.475025   
LgtRgr.skl -1.0      15000.0 -1.0          3.085191   
LgtRgr.tfw  1000.0   10000.0  10.0        18.995890   
LgtRgr.skl -1.0      10000.0 -1.0          2.885114   
LgtRgr.tfw  1000.0   10000.0  1.0          9.502959   
                              0.1          8.912652   
LgtRgr.skl -1.0      5000.0  -1.0          2.610439   
                     2000.0  -1.0          2.498749   
                     1000.0  -1.0          2.539650   
LgtRgr.tfw  100.0    1000.0   10.0        24.274683   
                              1.0         14.714048   
                              0.1         12.600104   
LgtRgr.skl -1.0      100.0   -1.0          2.497940   

                                                                             logLossVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  10.0       {u'logLossCls': [1.72596805748, 1.7853875361, ...   
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  1.0        {u'logLossCls': [1.05645327144, 1.44372530176,...   
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  10.0       {u'logLossCls': [1.84540699409, 1.93896956479,...   
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  1.0        {u'logLossCls': [1.14059053163, 1.46723165909,...   
                              0.1        {u'logLossCls': [1.11536194976, 1.25718941188,...   
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN   
                     2000.0  -1.0                                                      NaN   
                     1000.0  -1.0                                                      NaN   
LgtRgr.tfw  100.0    1000.0   10.0       {u'logLossCls': [1.28781316316, 3.51519487621,...   
                              1.0        {u'logLossCls': [0.842354858019, 1.17531354279...   
                              0.1        {u'logLossCls': [1.47411393905, 2.08387227918,...   
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN   

                                         lrnRateTfw  nObsFit  nStepsTfw  \
id         nStepsTfw nObsFit lrnRateTfw                                   
LgtRgr.skl -1.0      22424.0 -1.0              -1.0  22424.0       -1.0   
LgtRgr.tfw  1000.0   18077.0  10.0             10.0  18077.0     1000.0   
LgtRgr.skl -1.0      18077.0 -1.0              -1.0  18077.0       -1.0   
LgtRgr.tfw  1000.0   18077.0  1.0               1.0  18077.0     1000.0   
LgtRgr.skl -1.0      15000.0 -1.0              -1.0  15000.0       -1.0   
LgtRgr.tfw  1000.0   10000.0  10.0             10.0  10000.0     1000.0   
LgtRgr.skl -1.0      10000.0 -1.0              -1.0  10000.0       -1.0   
LgtRgr.tfw  1000.0   10000.0  1.0               1.0  10000.0     1000.0   
                              0.1               0.1  10000.0     1000.0   
LgtRgr.skl -1.0      5000.0  -1.0              -1.0   5000.0       -1.0   
                     2000.0  -1.0              -1.0   2000.0       -1.0   
                     1000.0  -1.0              -1.0   1000.0       -1.0   
LgtRgr.tfw  100.0    1000.0   10.0             10.0   1000.0      100.0   
                              1.0               1.0   1000.0      100.0   
                              0.1               0.1   1000.0      100.0   
LgtRgr.skl -1.0      100.0   -1.0              -1.0    100.0       -1.0   

                                                                                   predNew  
id         nStepsTfw nObsFit lrnRateTfw                                                     
LgtRgr.skl -1.0      22424.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
LgtRgr.tfw  1000.0   18077.0  10.0                                                     NaN  
LgtRgr.skl -1.0      18077.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
LgtRgr.tfw  1000.0   18077.0  1.0                                                      NaN  
LgtRgr.skl -1.0      15000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
LgtRgr.tfw  1000.0   10000.0  10.0                                                     NaN  
LgtRgr.skl -1.0      10000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
LgtRgr.tfw  1000.0   10000.0  1.0                                                      NaN  
                              0.1                                                      NaN  
LgtRgr.skl -1.0      5000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     2000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     1000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
LgtRgr.tfw  100.0    1000.0   10.0                                                     NaN  
                              1.0                                                      NaN  
                              0.1                                                      NaN  
LgtRgr.skl -1.0      100.0   -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
In [13]:
%run img_utils.py
srchParamsDct = {
    'nObsFit' : [glbObsFitFtr.shape[0]],
#     'nObsFit' : [100, 1000, 5000, 10000, glbObsFitFtr.shape[0]]
    'nStepsTfw' : [1000],
#     'nStepsTfw' : [1000, 2000],    
    'lrnRateTfw' : [1.0, 10.0]
#     'lrnRateTfw' : [0.1, 1.0, 10.0]    
                }

jnk = mysearchParams(fitMdlLgtRgrTfw, srchParamsDct = srchParamsDct,
                     curResultsDf = glbMdlDf, 
               mode = 'displayonly', 
        sort_values    = ['nObsFit', 'accVld', 'logLossVld', 'duration'],
        sort_ascending = [False    , True    , False,        False],
                save_drop_cols = 'model',     
                save_filepathname = glbPickleFile['models'],
              lclXFit = glbXFit, lclYFit = glbYFit) 

# thsDf, thsObsVldRspPredProba, thsObsNewRspPredProba = fitMdlLgtRgrTfw(
#     glbXFit, glbYFit, 
#     nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5,
#     visualize = False, newObs = False, verbose = False)
mysearchParams: will run <function fitMdlLgtRgrTfw at 0x11d1c4ed8> with params:
Empty DataFrame
Columns: [nStepsTfw, nObsFit, lrnRateTfw]
Index: []
In [14]:
%run img_utils.py

glbMdlDf = mysearchParams(fitMdlLgtRgrTfw, srchParamsDct = srchParamsDct,
                     curResultsDf = glbMdlDf, 
               mode = 'run', 
        sort_values    = ['nObsFit', 'accVld', 'logLossVld', 'duration'],
        sort_ascending = [False    , False    , True,        True],       
                save_filepathname = glbPickleFile['models'],
                save_drop_cols = 'model',          
              lclXFit = glbXFit, lclYFit = glbYFit)
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:31: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:32: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
mysearchParams: running <function fitMdlLgtRgrTfw at 0x11994eed8> with params:
nStepsTfw      1000.0
nObsFit       18077.0
lrnRateTfw        1.0
Name: (-1.0, 100.0, -1.0), dtype: float64

Logistic Regression (TensorFlow): nObsFit:18077; nStepsTfw: 1000; lrnRateTfw:1.0000
  visualize: False; newObs: False; verbose: False
  logLoss at step   200: 3.8964 (109 secs)
  logLoss at step   400: 0.4543 (211 secs)
  logLoss at step   600: 0.2883 (313 secs)
  logLoss at step   800: 0.2021 (415 secs)
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:122: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:123: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:124: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:125: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  duration: 519 seconds
                                                                                 accVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  10.0       {u'accCls': [0.504132231405, 0.435146443515, 0...   
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  1.0        {u'accCls': [0.326446280992, 0.209205020921, 0...   
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  10.0       {u'accCls': [0.456611570248, 0.380753138075, 0...   
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  1.0        {u'accCls': [0.289256198347, 0.182008368201, 0...   
                              0.1        {u'accCls': [0.169421487603, 0.144351464435, 0...   
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN   
                     2000.0  -1.0                                                      NaN   
                     1000.0  -1.0                                                      NaN   
LgtRgr.tfw  100.0    1000.0   10.0       {u'accCls': [0.646694214876, 0.0564853556485, ...   
                              1.0        {u'accCls': [0.423553719008, 0.182008368201, 0...   
                              0.1        {u'accCls': [0.150826446281, 0.142259414226, 0...   
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN   

                                        bestFit  logLossVld  \
id         nStepsTfw nObsFit lrnRateTfw                       
LgtRgr.skl -1.0      22424.0 -1.0           NaN    0.018463   
LgtRgr.tfw  1000.0   18077.0  10.0          NaN   19.209913   
LgtRgr.skl -1.0      18077.0 -1.0          True    3.116799   
LgtRgr.tfw  1000.0   18077.0  1.0           NaN    9.475025   
LgtRgr.skl -1.0      15000.0 -1.0         False    3.085191   
LgtRgr.tfw  1000.0   10000.0  10.0          NaN   18.995890   
LgtRgr.skl -1.0      10000.0 -1.0         False    2.885114   
LgtRgr.tfw  1000.0   10000.0  1.0           NaN    9.502959   
                              0.1           NaN    8.912652   
LgtRgr.skl -1.0      5000.0  -1.0         False    2.610439   
                     2000.0  -1.0         False    2.498749   
                     1000.0  -1.0         False    2.539650   
LgtRgr.tfw  100.0    1000.0   10.0          NaN   24.274683   
                              1.0           NaN   14.714048   
                              0.1           NaN   12.600104   
LgtRgr.skl -1.0      100.0   -1.0         False    2.497940   

                                                                                   predNew  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   18077.0  10.0                                                     NaN   
LgtRgr.skl -1.0      18077.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   18077.0  1.0                                                      NaN   
LgtRgr.skl -1.0      15000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   10000.0  10.0                                                     NaN   
LgtRgr.skl -1.0      10000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   10000.0  1.0                                                      NaN   
                              0.1                                                      NaN   
LgtRgr.skl -1.0      5000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
                     2000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
                     1000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  100.0    1000.0   10.0                                                     NaN   
                              1.0                                                      NaN   
                              0.1                                                      NaN   
LgtRgr.skl -1.0      100.0   -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   

                                                                             logLossVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  10.0       {u'logLossCls': [1.72596805748, 1.7853875361, ...   
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  1.0        {u'logLossCls': [1.05645327144, 1.44372530176,...   
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  10.0       {u'logLossCls': [1.84540699409, 1.93896956479,...   
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  1.0        {u'logLossCls': [1.14059053163, 1.46723165909,...   
                              0.1        {u'logLossCls': [1.11536194976, 1.25718941188,...   
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN   
                     2000.0  -1.0                                                      NaN   
                     1000.0  -1.0                                                      NaN   
LgtRgr.tfw  100.0    1000.0   10.0       {u'logLossCls': [1.28781316316, 3.51519487621,...   
                              1.0        {u'logLossCls': [0.842354858019, 1.17531354279...   
                              0.1        {u'logLossCls': [1.47411393905, 2.08387227918,...   
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN   

                                           accVld  duration  \
id         nStepsTfw nObsFit lrnRateTfw                       
LgtRgr.skl -1.0      22424.0 -1.0        1.000000       874   
LgtRgr.tfw  1000.0   18077.0  10.0       0.375201       519   
LgtRgr.skl -1.0      18077.0 -1.0        0.343685       597   
LgtRgr.tfw  1000.0   18077.0  1.0        0.320221       519   
LgtRgr.skl -1.0      15000.0 -1.0        0.342535       514   
LgtRgr.tfw  1000.0   10000.0  10.0       0.384173       288   
LgtRgr.skl -1.0      10000.0 -1.0        0.358638       317   
LgtRgr.tfw  1000.0   10000.0  1.0        0.319991       301   
                              0.1        0.251208       295   
LgtRgr.skl -1.0      5000.0  -1.0        0.363699       150   
                     2000.0  -1.0        0.341385        55   
                     1000.0  -1.0        0.333333        28   
LgtRgr.tfw  100.0    1000.0   10.0       0.273522         7   
                              1.0        0.255809         5   
                              0.1        0.173453         5   
LgtRgr.skl -1.0      100.0   -1.0        0.307108         8   

                                                                                     model  
id         nStepsTfw nObsFit lrnRateTfw                                                     
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN  
LgtRgr.tfw  1000.0   18077.0  10.0       <tensorflow.python.client.session.Session obje...  
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN  
LgtRgr.tfw  1000.0   18077.0  1.0        <tensorflow.python.client.session.Session obje...  
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN  
LgtRgr.tfw  1000.0   10000.0  10.0                                                     NaN  
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN  
LgtRgr.tfw  1000.0   10000.0  1.0                                                      NaN  
                              0.1                                                      NaN  
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN  
                     2000.0  -1.0                                                      NaN  
                     1000.0  -1.0                                                      NaN  
LgtRgr.tfw  100.0    1000.0   10.0                                                     NaN  
                              1.0                                                      NaN  
                              0.1                                                      NaN  
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN  
Compressed pickle file: data/img_M_SFDD_ImgSz_64.pickle; size: 6 KB
In [16]:
lrn10Srs = glbMdlDf.ix[('LgtRgr.tfw',  1000.0,   18077.0,  10.0)]
# print lrn10Srs
lrn01Srs = glbMdlDf.ix[('LgtRgr.tfw',  1000.0,   18077.0,   1.0)]
# print lrn01Srs

print "lrn10Srs: accVld: %.4f" % (lrn10Srs['accVld'])
print "lrn01Srs: accVld: %.4f" % (lrn01Srs['accVld'])
print "lrn10-01: accVldDff: %.4f" % (lrn10Srs['accVld'] - lrn01Srs['accVld'])

print "lrn10Srs: accVldCls:"; print (lrn10Srs['accVldCls']['accCls'])
print "lrn01Srs: accVldCls:"; print (lrn01Srs['accVldCls']['accCls'])
print "lrn10-01: accVldClsDff:"; print (lrn10Srs['accVldCls']['accCls'] - 
                                        lrn01Srs['accVldCls']['accCls'])

print "\n"
print "lrn10Srs: logLossVld: %8.4f" % (lrn10Srs['logLossVld'])
print "lrn01Srs: logLossVld: %8.4f" % (lrn01Srs['logLossVld'])
print "lrn10-01: logLossVldDff: %8.4f" % (lrn10Srs['logLossVld'] - 
                                          lrn01Srs['logLossVld'])

print "lrn10Srs: logLossVldCls:"; print (lrn10Srs['logLossVldCls']['logLossCls'])
print "lrn01Srs: logLossVldCls:"; print (lrn01Srs['logLossVldCls']['logLossCls'])
print "lrn10-01: logLossVldClsDff:"; 
print (lrn10Srs['logLossVldCls']['logLossCls'] - 
       lrn01Srs['logLossVldCls']['logLossCls'])
lrn10Srs: accVld: 0.3752
lrn01Srs: accVld: 0.3202
lrn10-01: accVldDff: 0.0550
lrn10Srs: accVldCls:
[ 0.5041  0.4351  0.3805  0.1738  0.3965  0.2464  0.6749  0.5143  0.25
  0.1637]
lrn01Srs: accVldCls:
[ 0.3264  0.2092  0.2323  0.2144  0.3878  0.1588  0.6637  0.6286  0.2088
  0.219 ]
lrn10-01: accVldClsDff:
[ 0.1777  0.2259  0.1482 -0.0406  0.0087  0.0877  0.0113 -0.1143  0.0412
 -0.0553]


lrn10Srs: logLossVld:  19.2099
lrn01Srs: logLossVld:   9.4750
lrn10-01: logLossVldDff:   9.7349
lrn10Srs: logLossVldCls:
[ 1.726   1.7854  1.8043  2.6157  1.9122  2.3841  0.9763  1.0939  1.9704
  2.9416]
lrn01Srs: logLossVldCls:
[ 1.0565  1.4437  0.9226  1.0162  0.8034  1.5807  0.2146  0.2664  0.879
  1.2919]
lrn10-01: logLossVldClsDff:
[ 0.6695  0.3417  0.8817  1.5995  1.1088  0.8034  0.7616  0.8275  1.0915
  1.6497]
In [158]:
# Set value based on condition

# print glbMdlDf.ix[glbMdlDf['id'].str.contains('LogisticRegression.SGD.tf', 
#                                               na=False), 'id']
# glbMdlDf.ix[glbMdlDf['id'].str.contains('LogisticRegression.SGD.tf', 
#                                               na=False), 'id'] = 'LgtRgr.SGD.tf'
# print glbMdlDf.ix[glbMdlDf['id'].str.contains('LogisticRegression.SGD.tf', 
#                                               na=False), 'id']

def lclfixNanDf(df, column, default):
    print df[column].isnull()
    df.ix[df[column].isnull(), column] = default
    return(df)

# col = 'lrnRateTfw'; default = -1.0; 
# tmpMdlDf = lclfixNanDf(glbMdlDf, col, default); print tmpMdlDf[col]

# print glbMdlDf.ix[- glbMdlDf['nStepsTfw'].isnull(), ['id', 'nStepsTfw']]
# glbMdlDf.ix[- glbMdlDf['nStepsTfw'].isnull(), 'id'] = 'LogisticRegression.tf'
# print glbMdlDf.ix[- glbMdlDf['nStepsTfw'].isnull(), ['id', 'nStepsTfw']]

# print glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), ['nObsBtc', 'nObsFit']]
# # glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), 'nObsBtc'] = \
# #     glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), 'nObsFit']
# glbMdlDf['nObsBtc'] = glbMdlDf.apply(
#     lambda (row): row['nObsFit'] if pd.isnull(row['nObsBtc']) else row['nObsBtc'],
#                         axis = 1)
# print 'After:'    
# print glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), ['nObsBtc', 'nObsFit']]

# Change value
# tmpMdlDf = glbMdlDf
# print tmpMdlDf[(tmpMdlDf['id'].str.contains('LgtRgr.skl', na = False)) & 
#                (tmpMdlDf['nStepsTfw'] == 1.0)]
# print tmpMdlDf.ix[(tmpMdlDf['id'].str.contains('LgtRgr.skl', na = False)) & 
#                   (tmpMdlDf['nStepsTfw'] == 1.0), 'nStepsTfw']
# tmpMdlDf.ix[(tmpMdlDf['id'].str.contains('LgtRgr.skl', na = False)) & 
#                   (tmpMdlDf['nStepsTfw'] == 1.0), 'nStepsTfw'] = -1.0
# print 'After:'
# print tmpMdlDf

# Remove specific models
mask = (glbMdlDf['id'].str.contains('LgtRgr.tfw', na = False))
# mask = (glbMdlDf['id'].str.contains('LgtRgr.tfw', na = False)) & \
#        (glbMdlDf['nObsFit'] == 10000.0)
print mask
tmpMdlDf = glbMdlDf[~mask]
# print tmpMdlDf

# Remove dups
# print glbMdlDf.columns
# print (glbMdlDf['logLossVld'])
# print (glbMdlDf.index.duplicated())
# tmpMdlDf = glbMdlDf[~glbMdlDf.index.duplicated()]
# print (tmpMdlDf.index.duplicated())

glbMdlDf = tmpMdlDf
print 'After:'
print glbMdlDf
id          nStepsTfw  nObsFit  lrnRateTfw
LgtRgr.skl  -1.0       22424.0  -1.0          False
                       18077.0  -1.0          False
                       15000.0  -1.0          False
LgtRgr.tfw   1000.0    10000.0   10.0          True
LgtRgr.skl  -1.0       10000.0  -1.0          False
LgtRgr.tfw   1000.0    10000.0   1.0           True
                                 0.5           True
LgtRgr.skl  -1.0       5000.0   -1.0          False
                       2000.0   -1.0          False
                       1000.0   -1.0          False
LgtRgr.tfw   100.0     1000.0    10.0          True
                                 1.0           True
                                 0.5           True
LgtRgr.skl  -1.0       100.0    -1.0          False
Name: id, dtype: bool
After:
                                           accVld accVldCls bestFit  duration  \
id         nStepsTfw nObsFit lrnRateTfw                                         
LgtRgr.skl -1.0      22424.0 -1.0        1.000000       NaN     NaN       874   
                     18077.0 -1.0        0.343685       NaN    True       597   
                     15000.0 -1.0        0.342535       NaN   False       514   
                     10000.0 -1.0        0.358638       NaN   False       317   
                     5000.0  -1.0        0.363699       NaN   False       150   
                     2000.0  -1.0        0.341385       NaN   False        55   
                     1000.0  -1.0        0.333333       NaN   False        28   
                     100.0   -1.0        0.307108       NaN   False         8   

                                                 id  logLossVld logLossVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                         
LgtRgr.skl -1.0      22424.0 -1.0        LgtRgr.skl    0.018463           NaN   
                     18077.0 -1.0        LgtRgr.skl    3.116799           NaN   
                     15000.0 -1.0        LgtRgr.skl    3.085191           NaN   
                     10000.0 -1.0        LgtRgr.skl    2.885114           NaN   
                     5000.0  -1.0        LgtRgr.skl    2.610439           NaN   
                     2000.0  -1.0        LgtRgr.skl    2.498749           NaN   
                     1000.0  -1.0        LgtRgr.skl    2.539650           NaN   
                     100.0   -1.0        LgtRgr.skl    2.497940           NaN   

                                         lrnRateTfw model  nObsFit  nStepsTfw  \
id         nStepsTfw nObsFit lrnRateTfw                                         
LgtRgr.skl -1.0      22424.0 -1.0              -1.0   NaN  22424.0       -1.0   
                     18077.0 -1.0              -1.0   NaN  18077.0       -1.0   
                     15000.0 -1.0              -1.0   NaN  15000.0       -1.0   
                     10000.0 -1.0              -1.0   NaN  10000.0       -1.0   
                     5000.0  -1.0              -1.0   NaN   5000.0       -1.0   
                     2000.0  -1.0              -1.0   NaN   2000.0       -1.0   
                     1000.0  -1.0              -1.0   NaN   1000.0       -1.0   
                     100.0   -1.0              -1.0   NaN    100.0       -1.0   

                                                                                   predNew  
id         nStepsTfw nObsFit lrnRateTfw                                                     
LgtRgr.skl -1.0      22424.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     18077.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     15000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     10000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     5000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     2000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     1000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                     100.0   -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
In [26]:
myexportDf(glbMdlDf, 
           save_filepathname = glbPickleFile['models'],
           save_drop_cols = 'model'
          )
labels ['model'] not contained in axis
Compressed pickle file: data/img_M_SFDD_ImgSz_64.pickle; size: 3 KB
In [15]:
glbMdlDf['bestFit'] = False
glbMdlDf.ix[('LgtRgr.tfw', 1000.0, 18077.0, 10.0), 'bestFit'] = True
# LgtRgr.tfw  1000.0   18077.0  10.0
print glbMdlDf[list(set(glbMdlDf.columns) - set(srchParamsDct.keys()))]
# print glbMdlDf[glbMdlDf.nObsFit >= 10000][
#     list(set(glbMdlDf.columns) - set(srchParamsDct.keys()))]
                                                                                 accVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  10.0       {u'accCls': [0.504132231405, 0.435146443515, 0...   
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  1.0        {u'accCls': [0.326446280992, 0.209205020921, 0...   
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  10.0       {u'accCls': [0.456611570248, 0.380753138075, 0...   
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  1.0        {u'accCls': [0.289256198347, 0.182008368201, 0...   
                              0.1        {u'accCls': [0.169421487603, 0.144351464435, 0...   
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN   
                     2000.0  -1.0                                                      NaN   
                     1000.0  -1.0                                                      NaN   
LgtRgr.tfw  100.0    1000.0   10.0       {u'accCls': [0.646694214876, 0.0564853556485, ...   
                              1.0        {u'accCls': [0.423553719008, 0.182008368201, 0...   
                              0.1        {u'accCls': [0.150826446281, 0.142259414226, 0...   
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN   

                                        bestFit  logLossVld  \
id         nStepsTfw nObsFit lrnRateTfw                       
LgtRgr.skl -1.0      22424.0 -1.0         False    0.018463   
LgtRgr.tfw  1000.0   18077.0  10.0         True   19.209913   
LgtRgr.skl -1.0      18077.0 -1.0         False    3.116799   
LgtRgr.tfw  1000.0   18077.0  1.0         False    9.475025   
LgtRgr.skl -1.0      15000.0 -1.0         False    3.085191   
LgtRgr.tfw  1000.0   10000.0  10.0        False   18.995890   
LgtRgr.skl -1.0      10000.0 -1.0         False    2.885114   
LgtRgr.tfw  1000.0   10000.0  1.0         False    9.502959   
                              0.1         False    8.912652   
LgtRgr.skl -1.0      5000.0  -1.0         False    2.610439   
                     2000.0  -1.0         False    2.498749   
                     1000.0  -1.0         False    2.539650   
LgtRgr.tfw  100.0    1000.0   10.0        False   24.274683   
                              1.0         False   14.714048   
                              0.1         False   12.600104   
LgtRgr.skl -1.0      100.0   -1.0         False    2.497940   

                                                                                   predNew  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   18077.0  10.0                                                     NaN   
LgtRgr.skl -1.0      18077.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   18077.0  1.0                                                      NaN   
LgtRgr.skl -1.0      15000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   10000.0  10.0                                                     NaN   
LgtRgr.skl -1.0      10000.0 -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  1000.0   10000.0  1.0                                                      NaN   
                              0.1                                                      NaN   
LgtRgr.skl -1.0      5000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
                     2000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
                     1000.0  -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   
LgtRgr.tfw  100.0    1000.0   10.0                                                     NaN   
                              1.0                                                      NaN   
                              0.1                                                      NaN   
LgtRgr.skl -1.0      100.0   -1.0        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   

                                                                             logLossVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.skl -1.0      22424.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  10.0       {u'logLossCls': [1.72596805748, 1.7853875361, ...   
LgtRgr.skl -1.0      18077.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   18077.0  1.0        {u'logLossCls': [1.05645327144, 1.44372530176,...   
LgtRgr.skl -1.0      15000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  10.0       {u'logLossCls': [1.84540699409, 1.93896956479,...   
LgtRgr.skl -1.0      10000.0 -1.0                                                      NaN   
LgtRgr.tfw  1000.0   10000.0  1.0        {u'logLossCls': [1.14059053163, 1.46723165909,...   
                              0.1        {u'logLossCls': [1.11536194976, 1.25718941188,...   
LgtRgr.skl -1.0      5000.0  -1.0                                                      NaN   
                     2000.0  -1.0                                                      NaN   
                     1000.0  -1.0                                                      NaN   
LgtRgr.tfw  100.0    1000.0   10.0       {u'logLossCls': [1.28781316316, 3.51519487621,...   
                              1.0        {u'logLossCls': [0.842354858019, 1.17531354279...   
                              0.1        {u'logLossCls': [1.47411393905, 2.08387227918,...   
LgtRgr.skl -1.0      100.0   -1.0                                                      NaN   

                                           accVld  duration          id  
id         nStepsTfw nObsFit lrnRateTfw                                  
LgtRgr.skl -1.0      22424.0 -1.0        1.000000       874  LgtRgr.skl  
LgtRgr.tfw  1000.0   18077.0  10.0       0.375201       519  LgtRgr.tfw  
LgtRgr.skl -1.0      18077.0 -1.0        0.343685       597  LgtRgr.skl  
LgtRgr.tfw  1000.0   18077.0  1.0        0.320221       519  LgtRgr.tfw  
LgtRgr.skl -1.0      15000.0 -1.0        0.342535       514  LgtRgr.skl  
LgtRgr.tfw  1000.0   10000.0  10.0       0.384173       288  LgtRgr.tfw  
LgtRgr.skl -1.0      10000.0 -1.0        0.358638       317  LgtRgr.skl  
LgtRgr.tfw  1000.0   10000.0  1.0        0.319991       301  LgtRgr.tfw  
                              0.1        0.251208       295  LgtRgr.tfw  
LgtRgr.skl -1.0      5000.0  -1.0        0.363699       150  LgtRgr.skl  
                     2000.0  -1.0        0.341385        55  LgtRgr.skl  
                     1000.0  -1.0        0.333333        28  LgtRgr.skl  
LgtRgr.tfw  100.0    1000.0   10.0       0.273522         7  LgtRgr.tfw  
                              1.0        0.255809         5  LgtRgr.tfw  
                              0.1        0.173453         5  LgtRgr.tfw  
LgtRgr.skl -1.0      100.0   -1.0        0.307108         8  LgtRgr.skl  
/usr/local/lib/python2.7/site-packages/ipykernel/kernelbase.py:212: PerformanceWarning: indexing past lexsort depth may impact performance.
  handler(stream, idents, msg)
In [4]:
# # With gradient descent training, even this much data is prohibitive.
# # Subset the training data for faster turnaround.
# nObsFit = 10000

# graph = tf.Graph()
# with graph.as_default():

#   # Input data.
#   # Load the training, validation and test data into constants that are
#   # attached to the graph.
#   tfwObsFitFtr = tf.constant(glbXFit[:nObsFit, :])
#   tfwObsFitRsp = tf.constant(glbYFit[:nObsFit])
#   tfwObsVldFtr = tf.constant(glbXVld)
#   tfwObsNewFtr = tf.constant(glbXNew)
  
#   # Variables.
#   # These are the parameters that we are going to be training. The weight
#   # matrix will be initialized using random valued following a (truncated)
#   # normal distribution. The tfwB get initialized to zero.
#   tfwW = tf.Variable(
#     tf.truncated_normal([glbImg['size'] * glbImg['size'], glbRspClassN]), name = 'tfwW')
#   tfwB = tf.Variable(tf.zeros([glbRspClassN]), name = 'tfwB')
#   print(tfwW.initialized_value())
#   print(tfwB.initialized_value())
# #   print 'initial  tfwB:%s' % (np.vectorize("%.4e".__mod__)(tf.get_variable('tfwB')))
# #   print 'initial tfwW (first 5 only):' 
# #   for lblIx in xrange(glbRspClassN):
# #     print 'lblIx:%2d:%s'% (np.vectorize("%.4e".__mod__)(tfwW.value()[:5, lblIx]))
  
#   # Training computation.
#   # We multiply the inputs with the weight matrix, and add tfwB. We compute
#   # the softmax and cross-entropy (it's one operation in TensorFlow, because
#   # it's very common, and it can be optimized). We take the average of this
#   # cross-entropy across all training examples: that's our loss.
#   logits = tf.matmul(tfwObsFitFtr, tfwW) + tfwB
#   loss = tf.reduce_mean(
#     tf.nn.softmax_cross_entropy_with_logits(logits, tfwObsFitRsp))
  
#   # Optimizer.
#   # We are going to find the minimum of this loss using gradient descent.
#   optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
#   # Predictions for the training, validation, and test data.
#   # These are not part of training, but merely here so that we can report
#   # accuracy_score figures as we train.
#   tfwObsTrnPred = tf.nn.softmax(logits)
#   tfwObsVldPred = tf.nn.softmax(
#     tf.matmul(tfwObsVldFtr, tfwW) + tfwB)
#   tfwObsNewPred = tf.nn.softmax(tf.matmul(tfwObsNewFtr, tfwW) + tfwB)
Tensor("Identity:0", shape=TensorShape([Dimension(784), Dimension(10)]), dtype=float32)
Tensor("Identity_1:0", shape=TensorShape([Dimension(10)]), dtype=float32)

Let's run this computation and iterate:

In [58]:
# nStepsTfw = 801

# def accuracy_score(predictions, labels):
#   return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
#           / predictions.shape[0])

# with tf.Session(graph=graph) as session:
#   # This is a one-time operation which ensures the parameters get initialized as
#   # we described in the graph: random tfwW for the matrix, zeros for the
#   # tfwB. 
#   tf.initialize_all_variables().run()
#   print('Initialized')
#   for step in range(nStepsTfw):
#     # Run the computations. We tell .run() that we want to run the optimizer,
#     # and get the loss value and the training predictions returned as numpy
#     # arrays.
#     _, l, predictions = session.run([optimizer, loss, tfwObsTrnPred])
#     if (step % 100 == 0):
#       print('Loss at step %d: %f' % (step, l))
#       print('Training accuracy_score: %.1f%%' % accuracy_score(
#         predictions, glbYFit[:nObsFit, :]))
#       # Calling .eval() on tfwObsVldPred is basically like calling run(), but
#       # just to get that one numpy array. Note that it recomputes all its graph
#       # dependencies.
#       print('Validation accuracy_score: %.1f%%' % accuracy_score(
#         tfwObsVldPred.eval(), glbYVld))
#   print('Test accuracy_score: %.1f%%' % accuracy_score(tfwObsNewPred.eval(), glbYNew))
In [22]:
robjects.pandas2ri.activate()
pltRDf = robjects.conversion.py2ri(glbMdlDf)
# print(pltRDf)
pltRFn = robjects.r("""
    source('~/Dropbox/datascience/R/myplot.R')
    function(RDf, filename) {
        mypltModelStats(RDf, c('accVld', 'logLossVld', 'duration'), 
            dim = c('nObsFit', 'id', 'nStepsTfw', 'lrnRateTfw'), 
                scaleXFn = NULL, 
                #highLightIx = which.min(RDf$logLossVld),
                highLightIx = which(RDf$bestFit == 'TRUE'),                
            title = NULL, 
            fileName = filename)
    }                        
""")    
pltRFn(pltRDf, 'img_03_fit_lgtRgr_Tfw_SFDD_glbMdlDf.png')

pltRFn = robjects.r("""
    source('~/Dropbox/datascience/R/myplot.R')
    function(RDf, filename) {
        mypltModelStats(RDf, c('accVld', 'logLossVld'), 
            dim = c('nObsFit', 'id', 'nStepsTfw', 'lrnRateTfw'), 
                scaleXFn = NULL, 
                #highLightIx = which.min(RDf$logLossVld),
                highLightIx = which(RDf$bestFit == 'TRUE'),                
            title = NULL, 
            fileName = filename)
    }                        
""")    
pltRFn(pltRDf, 'img_03_fit_lgtRgr_Tfw_SFDD_glbMdlDf_logLossVld.png')

pltRFn = robjects.r("""
    source('~/Dropbox/datascience/R/myplot.R')
    function(RDf, filename) {
        mypltModelStats(RDf, c('accVld'), 
            dim = c('nObsFit', 'id', 'nStepsTfw', 'lrnRateTfw'), 
                scaleXFn = NULL, 
                #highLightIx = which.min(RDf$logLossVld),
                highLightIx = which(RDf$bestFit == 'TRUE'),                
            title = NULL, 
            fileName = filename)
    }                        
""")    
pltRFn(pltRDf, 'img_03_fit_lgtRgr_Tfw_SFDD_glbMdlDf_accVld.png')
Out[22]:
<ListVector - Python:0x144af9098 / R:0x7ffce67838a8>
[DataF..., ListV..., Envir..., ..., ListV..., Envir..., ListV...]
<ListVector - Python:0x144af9098 / R:0x7ffce67838a8>
[DataF..., ListV..., Envir..., ..., ListV..., Envir..., ListV...]
<ListVector - Python:0x144af9098 / R:0x7ffce67838a8>
[DataF..., ListV..., Envir..., ..., ListV..., Envir..., ListV...]
  scales: <class 'rpy2.robjects.environments.Environment'>
  <Environment - Python:0x144abac68 / R:0x7ffcd4279898>
  ...
<ListVector - Python:0x144af9098 / R:0x7ffce67838a8>
[DataF..., ListV..., Envir..., ..., ListV..., Envir..., ListV...]
  layers: <class 'rpy2.robjects.environments.Environment'>
  <Environment - Python:0x144adaef0 / R:0x7ffcd40465b8>
<ListVector - Python:0x144af9098 / R:0x7ffce67838a8>
[DataF..., ListV..., Envir..., ..., ListV..., Envir..., ListV...]
In [21]:
glbMdlDf.to_csv('img_03_fit_lgtRgr_Tfw_SFDD_glbMdlDf.csv')

Fit selected model to glbObsFit

In [16]:
selMdlSrs = glbMdlDf[glbMdlDf['bestFit']]
print selMdlSrs
                                           accVld  \
id         nStepsTfw nObsFit lrnRateTfw             
LgtRgr.tfw 1000.0    18077.0 10.0        0.375201   

                                                                                 accVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.tfw 1000.0    18077.0 10.0        {u'accCls': [0.504132231405, 0.435146443515, 0...   

                                        bestFit  duration          id  \
id         nStepsTfw nObsFit lrnRateTfw                                 
LgtRgr.tfw 1000.0    18077.0 10.0          True       519  LgtRgr.tfw   

                                         logLossVld  \
id         nStepsTfw nObsFit lrnRateTfw               
LgtRgr.tfw 1000.0    18077.0 10.0         19.209913   

                                                                             logLossVldCls  \
id         nStepsTfw nObsFit lrnRateTfw                                                      
LgtRgr.tfw 1000.0    18077.0 10.0        {u'logLossCls': [1.72596805748, 1.7853875361, ...   

                                         lrnRateTfw  nObsFit  nStepsTfw  \
id         nStepsTfw nObsFit lrnRateTfw                                   
LgtRgr.tfw 1000.0    18077.0 10.0              10.0  18077.0     1000.0   

                                        predNew  
id         nStepsTfw nObsFit lrnRateTfw          
LgtRgr.tfw 1000.0    18077.0 10.0           NaN  
In [17]:
selMdlDf, selYVldPby, selYNewPby = fitMdlLgtRgrTfw(
    glbXFit, glbYFit, 
    nObsFit = glbXFit.shape[0], 
    nStepsTfw = selMdlSrs['nStepsTfw'][0], 
    lrnRateTfw = selMdlSrs['lrnRateTfw'][0], 
    visualize = True, newObs = True, verbose = True)
Logistic Regression (TensorFlow): nObsFit:18077; nStepsTfw: 1000; lrnRateTfw:10.0000
  visualize: True; newObs: True; verbose: True
('  tfwW:', <tf.Tensor 'Identity:0' shape=(4096, 10) dtype=float32>)
('  tfwB:', <tf.Tensor 'Identity_1:0' shape=(10,) dtype=float32>)
  Initialized
  logLoss at step   200: 44.2288 (106 secs)
  logLoss at step   400: 1.1421 (206 secs)
  logLoss at step   600: 0.5688 (306 secs)
  logLoss at step   800: 0.3106 (405 secs)

  Fit accuracy:0.9914
  Fit  logLoss:0.1189

  Vld accuracy:0.3752
[ 0.5041  0.4351  0.3805  0.1738  0.3965  0.2464  0.6749  0.5143  0.25
  0.1637]
[[244  21  36   2  98   0  23  47   5   8]
 [  4 208   9   0   2   0   4 241   6   4]
 [  0   2 172   0   0   1 147 120   9   1]
 [  3   2  79  77 183  41   2   0  39  17]
 [ 24   3  56  42 182  31  15  33  37  36]
 [ 15   9   7   0  44 104 218   4  21   0]
 [  1  16  25   3   6   0 299  78  13   2]
 [  0   8  15   0   0   0  81 180  66   0]
 [  7   7  53  15   5   0 138  46  91   2]
 [ 13   0  84 100  54   5   7 112   3  74]]
  Vld  logLoss:19.2099
[ 1.726   1.7854  1.8043  2.6157  1.9122  2.3841  0.9763  1.0939  1.9704
  2.9416]


max Pby for cls: c0; desc: normal driving; proba: 1.0000; nObs: 227
  img_62023.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c0; desc: normal driving; proba: 0.5098; nObs: 1
  img_49323.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.5098  0.      0.      0.      0.      0.      0.4321  0.0581  0.      0.    ]
  next best class: drinking


max Pby for cls: c1; desc: texting - right; proba: 1.0000; nObs: 197
  img_53130.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c1; desc: texting - right; proba: 0.4848; nObs: 1
  img_42578.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.3915  0.4848  0.      0.      0.      0.      0.1237  0.      0.      0.    ]
  next best class: normal driving


max Pby for cls: c2; desc: talking on the phone - right; proba: 1.0000; nObs: 405
  img_79556.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c2; desc: talking on the phone - right; proba: 0.3804; nObs: 1
  img_68054.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.3804  0.      0.336   0.      0.      0.2835  0.      0.    ]
  next best class: talking on the phone - left


max Pby for cls: c3; desc: texting - left; proba: 1.0000; nObs: 203
  img_79904.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c3; desc: texting - left; proba: 0.8969; nObs: 1
  img_1106.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.0936  0.      0.      0.8969  0.      0.      0.0095  0.      0.      0.    ]
  next best class: normal driving


max Pby for cls: c4; desc: talking on the phone - left; proba: 1.0000; nObs: 467
  img_18343.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
min Pby for cls: c4; desc: talking on the phone - left; proba: 0.5164; nObs: 1
  img_35848.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.      0.      0.5164  0.      0.      0.      0.
  0.4836]
  next best class: talking to passenger


max Pby for cls: c5; desc: operating the radio; proba: 1.0000; nObs: 134
  img_15928.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
min Pby for cls: c5; desc: operating the radio; proba: 0.5088; nObs: 1
  img_73704.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.      0.      0.      0.5088  0.      0.      0.4912
  0.    ]
  next best class: hair and makeup


max Pby for cls: c6; desc: drinking; proba: 1.0000; nObs: 748
  img_29251.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
min Pby for cls: c6; desc: drinking; proba: 0.4823; nObs: 1
  img_61731.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.0358  0.      0.      0.      0.4823  0.4819  0.      0.    ]
  next best class: reaching behind


max Pby for cls: c7; desc: reaching behind; proba: 1.0000; nObs: 646
  img_89322.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
min Pby for cls: c7; desc: reaching behind; proba: 0.5852; nObs: 1
  img_57279.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.4148  0.      0.      0.      0.      0.5852  0.      0.    ]
  next best class: talking on the phone - right


max Pby for cls: c8; desc: hair and makeup; proba: 1.0000; nObs: 205
  img_31586.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]
min Pby for cls: c8; desc: hair and makeup; proba: 0.5303; nObs: 1
  img_50218.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.4548  0.      0.      0.      0.      0.      0.015   0.      0.5303
  0.    ]
  next best class: normal driving


max Pby for cls: c9; desc: talking to passenger; proba: 1.0000; nObs: 85
  img_35236.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
min Pby for cls: c9; desc: talking to passenger; proba: 0.5486; nObs: 1
  img_33399.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.      0.      0.4514  0.      0.      0.      0.
  0.5486]
  next best class: talking on the phone - left
  predicting 79726 new obs...
    @  551 secs: obsIx:     0
    @  552 secs: obsIx:  8000
    @  557 secs: obsIx: 40000


max Pby for cls: c0; desc: normal driving; proba: 1.0000; nObs: 4137
  img_100.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c0; desc: normal driving; proba: 0.4154; nObs: 1
  img_93716.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.4154  0.      0.      0.      0.      0.      0.      0.      0.3153
  0.2693]
  next best class: hair and makeup


max Pby for cls: c1; desc: texting - right; proba: 1.0000; nObs: 4845
  img_100001.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c1; desc: texting - right; proba: 0.4962; nObs: 1
  img_74063.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.4962  0.      0.      0.      0.      0.4737  0.      0.0302
  0.    ]
  next best class: drinking


max Pby for cls: c2; desc: talking on the phone - right; proba: 1.0000; nObs: 10247
  img_1.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c2; desc: talking on the phone - right; proba: 0.4067; nObs: 1
  img_79284.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.194   0.      0.4067  0.      0.      0.3993  0.      0.      0.      0.    ]
  next best class: operating the radio


max Pby for cls: c3; desc: texting - left; proba: 1.0000; nObs: 4237
  img_100011.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
min Pby for cls: c3; desc: texting - left; proba: 0.4187; nObs: 1
  img_25992.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.      0.4187  0.      0.3735  0.2078  0.      0.      0.    ]
  next best class: operating the radio


max Pby for cls: c4; desc: talking on the phone - left; proba: 1.0000; nObs: 4865
  img_100002.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
min Pby for cls: c4; desc: talking on the phone - left; proba: 0.3965; nObs: 1
  img_74899.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.      0.      0.3965  0.      0.      0.      0.2966
  0.3068]
  next best class: talking to passenger


max Pby for cls: c5; desc: operating the radio; proba: 1.0000; nObs: 4766
  img_100023.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
min Pby for cls: c5; desc: operating the radio; proba: 0.5008; nObs: 1
  img_10497.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.4992  0.      0.      0.      0.      0.5008  0.      0.      0.      0.    ]
  next best class: normal driving


max Pby for cls: c6; desc: drinking; proba: 1.0000; nObs: 4439
  img_100004.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
min Pby for cls: c6; desc: drinking; proba: 0.4996; nObs: 1
  img_70494.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.0041  0.      0.      0.      0.      0.4996  0.      0.4963
  0.    ]
  next best class: hair and makeup


max Pby for cls: c7; desc: reaching behind; proba: 1.0000; nObs: 8032
  img_100005.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
min Pby for cls: c7; desc: reaching behind; proba: 0.4066; nObs: 1
  img_37771.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.3606  0.      0.      0.      0.      0.4066  0.2328
  0.    ]
  next best class: talking on the phone - right


max Pby for cls: c8; desc: hair and makeup; proba: 1.0000; nObs: 9435
  img_100007.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]
min Pby for cls: c8; desc: hair and makeup; proba: 0.4542; nObs: 1
  img_47457.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.2745  0.      0.      0.      0.      0.2713  0.      0.4542
  0.    ]
  next best class: texting - right


max Pby for cls: c9; desc: talking to passenger; proba: 1.0000; nObs: 3277
  img_100059.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
min Pby for cls: c9; desc: talking to passenger; proba: 0.4916; nObs: 1
  img_60087.jpg:
  plot_occlusion:
  display_weight:
  Proba:
[ 0.      0.      0.      0.4787  0.0297  0.      0.      0.      0.
  0.4916]
  next best class: texting - left

  New prediction knts:
{'kntCls': (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([ 5912,  6862, 13313,  5473,  6331,  5926,  6398, 11066, 13115,  5330]))}
  duration: 605 seconds
In [64]:
finMdlDf, glbYNewPredProba = fitMdlLgtRgrTfw(glbXTrn, glbYTrn, 
                        nObsFit = glbXTrn.shape[0], 
                        nStepsTfw = selMdlSrs['nStepsTfw'][0], 
                     verbose = True)
# thsDf, thsObsNewRspPredProba = fitMdlLgtRgrTfw(glbXFit, glbYFit, 
#                         nObsFit = 100, nStepsTfw = 10, 
#                         verbose = True)
Logistic Regression (TensorFlow): nObsFit:22424; nStepsTfw:20000
weights:
Tensor("Identity:0", shape=(1024, 10), dtype=float32)
 biases:
Tensor("Identity_1:0", shape=(10,), dtype=float32)
Initialized
Loss at step 0: 11.271506
Loss at step 2: 7.000206
Loss at step 4: 5.870358
Loss at step 6: 5.496468
Loss at step 8: 5.219804
Loss at step 20: 4.457797
Loss at step 40: 3.789667
Loss at step 60: 3.330005
Loss at step 80: 3.100190
Loss at step 200: 2.088403
Loss at step 400: 1.443837
Loss at step 600: 1.048325
Loss at step 800: 0.849781
Loss at step 2000: 0.408565
Loss at step 4000: 0.266258
Loss at step 6000: 0.206249
Loss at step 8000: 0.171473
accFit:0.9824; logLossFit:0.0923
accVld:0.9804; logLossVld:0.1009

  New prediction knts:
{'clsKnt': (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([ 6370, 10592,  7958,  7524,  6092,  7638,  6433,  7881, 12973,  6265]))}
  elapsed: 3533 seconds
In [82]:
finMdlDf = mdlFinDf
In [86]:
print finMdlDf
print finMdlDf['nStepsTfw']
                      id  nObsFit  \
0  LogisticRegression.tf    22424   

                                           nStepsTfw    accVld  logLossVld  \
0  id                          nObsFit  nStepsTfw...  0.980375    0.100946   

                                             predNew  \
0  {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...   

                                               model  elapsedSecs  
0  <tensorflow.python.framework.ops.Graph object ...         3533  
0    id                          nObsFit  nStepsTfw...
Name: nStepsTfw, dtype: object
In [87]:
finMdlDf['nStepsTfw'] = selMdlSrs['nStepsTfw'][0]
print finMdlDf['nStepsTfw']
0    20000
Name: nStepsTfw, dtype: float64
In [65]:
glbMdlDf = glbMdlDf.append(mdlFinDf)
In [92]:
print glbMdlDf['nObsFit']
print glbMdlDf.shape
print glbMdlDf.ix[23, 'nStepsTfw']
glbMdlDf.ix[23, 'nStepsTfw'] = selMdlSrs['nStepsTfw'][0]
print glbMdlDf.ix[23, 'nStepsTfw']
(LogisticRegression.sklearn, 22424, nan)        22424
(LogisticRegression.sklearn, 17940, 20000.0)    17940
(LogisticRegression.sklearn, 10000, 20000.0)    10000
(LogisticRegression.sklearn, 17940, nan)        17940
(LogisticRegression.sklearn, 17940, 10000.0)    17940
(LogisticRegression.sklearn, 10000, 10000.0)    10000
(LogisticRegression.sklearn, 10000, nan)        10000
(LogisticRegression.sklearn, 5000, nan)          5000
(LogisticRegression.sklearn, 17940, 1000.0)     17940
(LogisticRegression.sklearn, 10000, 1000.0)     10000
(LogisticRegression.sklearn, 1000, nan)          1000
(LogisticRegression.sklearn, 1000, 10000.0)      1000
(LogisticRegression.sklearn, 1000, 20000.0)      1000
(LogisticRegression.sklearn, 1000, 1000.0)       1000
(LogisticRegression.sklearn, 100, nan)            100
(LogisticRegression.sklearn, 10000, 100.0)      10000
(LogisticRegression.sklearn, 17940, 100.0)      17940
(LogisticRegression.sklearn, 1000, 100.0)        1000
(LogisticRegression.sklearn, 100, 1000.0)         100
(LogisticRegression.sklearn, 100, 100.0)          100
(LogisticRegression.sklearn, 100, 10000.0)        100
(LogisticRegression.sklearn, 100, 20000.0)        100
(LogisticRegression.sklearn, 100, 10.0)           100
0                                               22424
Name: nObsFit, dtype: int64
(24, 9)
id                          nObsFit  nStepsTfw
LogisticRegression.sklearn  17940    20000        20000
Name: nStepsTfw, dtype: float64
20000.0
In [94]:
#print glbMdlDf['nStepsTfw']
# tmpMdlDf = (glbMdlDf
#                 .set_index(['nObsFit', 'nStepsTfw'], 
#                            drop = False)
#                 )
# tmpMdlDf = (glbMdlDf
#                 .set_index(['id', 'nObsFit', 'nStepsTfw'], 
#                            drop = False)
#                 .sort_values(['logLossVld', 'accVld'], ascending = 
#                              ['True'      , 'False' ])
#                 )
# savMdlDf = glbMdlDf
# glbMdlDf = tmpMdlDf
# print glbMdlDf
glbMdlDf = mydspMdls(glbMdlDf)
# mysaveMdls()
                                                accVld bstFit  elapsedSecs  \
id                         nObsFit nStepsTfw                                 
LogisticRegression.tf      22424   20000      0.980375    NaN         3533   
LogisticRegression.sklearn 22424   NaN        0.983943  False          238   
                           17940   20000      0.000000   True         2898   
                           10000   20000      0.000000  False         1696   
                           17940   NaN        0.969670  False          166   
                                   10000      0.000000  False         1411   
                           10000   10000      0.000000  False          801   
                                   NaN        0.956512  False           83   
                           5000    NaN        0.936664  False           37   
                           17940   1000       0.000000  False          148   
                           10000   1000       0.000000  False           85   
                           1000    NaN        0.836084  False            6   
                                   10000      0.000000  False          187   
                                   20000      0.000000  False          371   
                                   1000       0.000000  False           23   
                           100     NaN        0.328724  False            1   
                           10000   100        0.000000  False           14   
                           17940   100        0.000000  False           21   
                           1000    100        0.000000  False            8   
                           100     1000       0.000000  False           18   
                                   100        0.000000  False            6   
                                   10000      0.000000  False          130   
                                   20000      0.000000  False          260   
                                   10         0.000000  False            5   

                                                                      id  \
id                         nObsFit nStepsTfw                               
LogisticRegression.tf      22424   20000           LogisticRegression.tf   
LogisticRegression.sklearn 22424   NaN        LogisticRegression.sklearn   
                           17940   20000      LogisticRegression.sklearn   
                           10000   20000      LogisticRegression.sklearn   
                           17940   NaN        LogisticRegression.sklearn   
                                   10000      LogisticRegression.sklearn   
                           10000   10000      LogisticRegression.sklearn   
                                   NaN        LogisticRegression.sklearn   
                           5000    NaN        LogisticRegression.sklearn   
                           17940   1000       LogisticRegression.sklearn   
                           10000   1000       LogisticRegression.sklearn   
                           1000    NaN        LogisticRegression.sklearn   
                                   10000      LogisticRegression.sklearn   
                                   20000      LogisticRegression.sklearn   
                                   1000       LogisticRegression.sklearn   
                           100     NaN        LogisticRegression.sklearn   
                           10000   100        LogisticRegression.sklearn   
                           17940   100        LogisticRegression.sklearn   
                           1000    100        LogisticRegression.sklearn   
                           100     1000       LogisticRegression.sklearn   
                                   100        LogisticRegression.sklearn   
                                   10000      LogisticRegression.sklearn   
                                   20000      LogisticRegression.sklearn   
                                   10         LogisticRegression.sklearn   

                                              logLossVld  \
id                         nObsFit nStepsTfw               
LogisticRegression.tf      22424   20000        0.100946   
LogisticRegression.sklearn 22424   NaN          0.150835   
                           17940   20000        0.160944   
                           10000   20000        0.194986   
                           17940   NaN          0.201309   
                                   10000        0.217923   
                           10000   10000        0.246452   
                                   NaN          0.262027   
                           5000    NaN          0.356476   
                           17940   1000         0.729988   
                           10000   1000         0.747099   
                           1000    NaN          0.788946   
                                   10000        0.808008   
                                   20000        0.820409   
                                   1000         1.044522   
                           100     NaN          2.299075   
                           10000   100          2.879257   
                           17940   100          2.893371   
                           1000    100          3.004558   
                           100     1000         4.426427   
                                   100          4.571001   
                                   10000        4.879932   
                                   20000        5.052688   
                                   10           6.019710   

                                                                                          model  \
id                         nObsFit nStepsTfw                                                      
LogisticRegression.tf      22424   20000      <tensorflow.python.framework.ops.Graph object ...   
LogisticRegression.sklearn 22424   NaN                                                      NaN   
                           17940   20000                                                    NaN   
                           10000   20000                                                    NaN   
                           17940   NaN                                                      NaN   
                                   10000                                                    NaN   
                           10000   10000                                                    NaN   
                                   NaN                                                      NaN   
                           5000    NaN                                                      NaN   
                           17940   1000                                                     NaN   
                           10000   1000                                                     NaN   
                           1000    NaN                                                      NaN   
                                   10000                                                    NaN   
                                   20000                                                    NaN   
                                   1000                                                     NaN   
                           100     NaN                                                      NaN   
                           10000   100                                                      NaN   
                           17940   100                                                      NaN   
                           1000    100                                                      NaN   
                           100     1000                                                     NaN   
                                   100                                                      NaN   
                                   10000                                                    NaN   
                                   20000                                                    NaN   
                                   10                                                       NaN   

                                              nObsFit nStepsTfw  \
id                         nObsFit nStepsTfw                      
LogisticRegression.tf      22424   20000        22424     20000   
LogisticRegression.sklearn 22424   NaN          22424       NaN   
                           17940   20000        17940     20000   
                           10000   20000        10000     20000   
                           17940   NaN          17940       NaN   
                                   10000        17940     10000   
                           10000   10000        10000     10000   
                                   NaN          10000       NaN   
                           5000    NaN           5000       NaN   
                           17940   1000         17940      1000   
                           10000   1000         10000      1000   
                           1000    NaN           1000       NaN   
                                   10000         1000     10000   
                                   20000         1000     20000   
                                   1000          1000      1000   
                           100     NaN            100       NaN   
                           10000   100          10000       100   
                           17940   100          17940       100   
                           1000    100           1000       100   
                           100     1000           100      1000   
                                   100            100       100   
                                   10000          100     10000   
                                   20000          100     20000   
                                   10             100        10   

                                                                                        predNew  
id                         nObsFit nStepsTfw                                                     
LogisticRegression.tf      22424   20000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
LogisticRegression.sklearn 22424   NaN        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           17940   20000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           10000   20000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           17940   NaN        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   10000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           10000   10000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   NaN        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           5000    NaN        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           17940   1000       {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           10000   1000       {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           1000    NaN        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   10000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   20000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   1000       {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           100     NaN        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           10000   100        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           17940   100        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           1000    100        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                           100     1000       {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   100        {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   10000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   20000      {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
                                   10         {u'clsKnt': ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [...  
In [98]:
mysaveMdls()
Compressed pickle file: data/img_M_SFDD_ImgSz_32.pickle; size: 8 KB

Generate the model plots by scrolling up

Predict ObsNew using finMdlDf['model']

In [124]:
# print np.array_str(glbYNewPredProba[:5, :], 
#                    precision = 4, suppress_small = True)
glbYNewPred = np.argmax(glbYNewPredProba, axis = 1)
# print glbYNewPred[:5]
print np.unique(glbYNewPred, return_counts = True)
#print glbYTrn[:5, :]
tmpObsTrnRsp = np.argmax(glbYTrn, axis = 1)
# print tmpObsTrnRsp[:5]
print np.unique(glbYNewPred, return_counts = True)[1] * 1.0 / \
      np.unique(tmpObsTrnRsp, return_counts = True)[1]
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([ 6370, 10592,  7958,  7524,  6092,  7638,  6433,  7881, 12973,  6265]))
[ 2.55926075  4.67225408  3.43461372  3.20716113  2.61908856  3.30363322
  2.76688172  3.93656344  6.78859236  2.9426961 ]
In [130]:
print glbImg
{'color': False, 'pxlDepth': 255.0, 'size': 32}
In [131]:
for clsIx, cls in enumerate(glbRspClass):
    clsMsk = glbYNewPred == clsIx
    clsObsNewRspPredProba = glbYNewPredProba[clsMsk, :]
    clsObsNewIdn = [glbINew[ixMsk] for ixMsk in xrange(len(glbINew)) \
                    if clsMsk[ixMsk]]
    print '\n'
    
    maxClsProba = np.max(clsObsNewRspPredProba[:, clsIx])
    maxObsNewRspPredProba = clsObsNewRspPredProba[:, clsIx] == maxClsProba
    print 'Max Proba for cls: %s; desc: %s; proba: %0.4f; nObsNew: %d' % \
        (cls, glbRspClassDesc[cls], maxClsProba, maxObsNewRspPredProba.sum())
    imgFilePth = os.getcwd() + '/data/' + glbDataFile['newFoldersPth'] + '/' + \
                    clsObsNewIdn[np.argmax(clsObsNewRspPredProba[:, clsIx])]
    print '  %s:' % imgFilePth
    jpgfile = Image(imgFilePth, format = 'jpg', 
                        width = glbImg['size'] * 4, height = glbImg['size'] * 4)
    display(jpgfile)         
    print '  Proba:'; 
    print np.array_str(clsObsNewRspPredProba[np.argmax(clsObsNewRspPredProba[:, clsIx]), :],
                       precision=4, suppress_small=True)
        
    minClsProba = np.min(clsObsNewRspPredProba[:, clsIx])
    minObsNewRspPredProba = clsObsNewRspPredProba[:, clsIx] == minClsProba
    print 'Min Proba for cls: %s; desc: %s; proba: %0.4f; nObsNew: %d' % \
        (cls, glbRspClassDesc[cls], minClsProba, minObsNewRspPredProba.sum())
    imgFilePth = os.getcwd() + '/data/' + glbDataFile['newFoldersPth'] + '/' + \
                    clsObsNewIdn[np.argmin(clsObsNewRspPredProba[:, clsIx])]
    print '  %s:' % imgFilePth
    jpgfile = Image(imgFilePth, format = 'jpg', 
                        width = glbImg['size'] * 4, height = glbImg['size'] * 4)
    display(jpgfile)
    print '  Proba:'; 
    print np.array_str(clsObsNewRspPredProba[np.argmin(clsObsNewRspPredProba[:, clsIx]), :],
                       precision=4, suppress_small=True)
    thsObsNewRspPredProba = clsObsNewRspPredProba[np.argmin(clsObsNewRspPredProba[:, clsIx]), :]
    thsObsNewRspPredProba[clsIx] = 0
    print '  next best class: %s' % \
        (glbRspClassDesc[glbRspClass[np.argmax(thsObsNewRspPredProba)]])

Max Proba for cls: c0; desc: normal driving; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_22281.jpg:
  Proba:
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Min Proba for cls: c0; desc: normal driving; proba: 0.2345; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_48839.jpg:
  Proba:
[ 0.2345  0.0771  0.2317  0.2207  0.0579  0.001   0.0613  0.0001  0.0129
  0.1029]
  next best class: talking on the phone - right


Max Proba for cls: c1; desc: texting - right; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_91185.jpg:
  Proba:
[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
Min Proba for cls: c1; desc: texting - right; proba: 0.2005; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_490.jpg:
  Proba:
[ 0.0236  0.2005  0.      0.128   0.1182  0.1661  0.1558  0.0005  0.1783
  0.0293]
  next best class: hair and makeup


Max Proba for cls: c2; desc: talking on the phone - right; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_45248.jpg:
  Proba:
[ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
Min Proba for cls: c2; desc: talking on the phone - right; proba: 0.2234; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_53769.jpg:
  Proba:
[ 0.1674  0.0464  0.2234  0.0081  0.146   0.0048  0.1542  0.0998  0.007
  0.1426]
  next best class: normal driving


Max Proba for cls: c3; desc: texting - left; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_65853.jpg:
  Proba:
[ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
Min Proba for cls: c3; desc: texting - left; proba: 0.2278; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_57360.jpg:
  Proba:
[ 0.0099  0.1661  0.1746  0.2278  0.1476  0.1391  0.1026  0.      0.0322
  0.    ]
  next best class: talking on the phone - right


Max Proba for cls: c4; desc: talking on the phone - left; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_40761.jpg:
  Proba:
[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
Min Proba for cls: c4; desc: talking on the phone - left; proba: 0.2027; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_19776.jpg:
  Proba:
[ 0.0891  0.0716  0.1568  0.008   0.2027  0.0983  0.1471  0.      0.0919
  0.1345]
  next best class: talking on the phone - right


Max Proba for cls: c5; desc: operating the radio; proba: 1.0000; nObsNew: 107
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_100652.jpg:
  Proba:
[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
Min Proba for cls: c5; desc: operating the radio; proba: 0.2257; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_51092.jpg:
  Proba:
[ 0.0147  0.0629  0.0102  0.1632  0.0045  0.2257  0.2233  0.      0.1055
  0.19  ]
  next best class: drinking


Max Proba for cls: c6; desc: drinking; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_89962.jpg:
  Proba:
[ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
Min Proba for cls: c6; desc: drinking; proba: 0.2073; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_13086.jpg:
  Proba:
[ 0.1666  0.1501  0.1129  0.1801  0.0016  0.      0.2073  0.0907  0.0216
  0.0691]
  next best class: texting - left


Max Proba for cls: c7; desc: reaching behind; proba: 1.0000; nObsNew: 35
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_100236.jpg:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
Min Proba for cls: c7; desc: reaching behind; proba: 0.2259; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_84684.jpg:
  Proba:
[ 0.      0.2061  0.1067  0.2033  0.0001  0.0011  0.177   0.2259  0.0638
  0.0158]
  next best class: texting - right


Max Proba for cls: c8; desc: hair and makeup; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_46885.jpg:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]
Min Proba for cls: c8; desc: hair and makeup; proba: 0.2280; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_54779.jpg:
  Proba:
[ 0.0025  0.1558  0.0737  0.002   0.1987  0.      0.1928  0.0064  0.228
  0.1401]
  next best class: talking on the phone - left


Max Proba for cls: c9; desc: talking to passenger; proba: 1.0000; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_32829.jpg:
  Proba:
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
Min Proba for cls: c9; desc: talking to passenger; proba: 0.2002; nObsNew: 1
  /Users/bbalaji-2012/Documents/Work/DataScience/Kaggle/StateFarm/data/imgs/test/img_27657.jpg:
  Proba:
[ 0.1663  0.0047  0.0958  0.1585  0.0029  0.0111  0.0282  0.1939  0.1383
  0.2002]
  next best class: reaching behind

Output submission

In [132]:
sbmObsNewDf = pd.DataFrame(glbYNewPredProba)
sbmObsNewDf.columns = glbRspClass
sbmObsNewDf['img'] = glbINew
sbmObsNewDf = (sbmObsNewDf
                .set_index(['img'], 
                           drop = False)
                .sort_values('img')
                )
sbmObsNewDf = sbmObsNewDf[['img'] + glbRspClass]
print sbmObsNewDf.head()
print sbmObsNewDf.tail()
                           img            c0            c1            c2  \
img                                                                        
img_1.jpg            img_1.jpg  1.022341e-09  1.291037e-13  1.555788e-06   
img_10.jpg          img_10.jpg  2.402234e-06  5.292969e-06  9.821139e-05   
img_100.jpg        img_100.jpg  8.845692e-01  3.524664e-04  2.565228e-06   
img_1000.jpg      img_1000.jpg  1.225707e-02  1.982485e-02  4.778495e-02   
img_100000.jpg  img_100000.jpg  1.405662e-06  4.824674e-07  2.834371e-07   

                          c3            c4            c5            c6  \
img                                                                      
img_1.jpg       1.410185e-14  3.884833e-10  9.999970e-01  1.474044e-06   
img_10.jpg      2.949705e-01  1.301745e-04  7.047829e-01  2.255957e-09   
img_100.jpg     3.062941e-04  1.676451e-04  3.745919e-07  3.374660e-06   
img_1000.jpg    4.757002e-07  8.205907e-01  3.033170e-05  9.736010e-02   
img_100000.jpg  2.272234e-04  9.805247e-01  7.948852e-08  5.065448e-09   

                          c7            c8            c9  
img                                                       
img_1.jpg       3.129472e-11  1.022849e-11  2.690043e-09  
img_10.jpg      1.052496e-05  6.288445e-08  1.646834e-08  
img_100.jpg     1.175810e-06  2.099598e-03  1.124972e-01  
img_1000.jpg    2.921651e-05  1.652298e-03  4.698370e-04  
img_100000.jpg  5.099652e-08  1.924536e-02  4.479208e-07  
                         img            c0        c1        c2            c3  \
img                                                                            
img_99994.jpg  img_99994.jpg  7.162805e-05  0.000320  0.000009  9.440665e-09   
img_99995.jpg  img_99995.jpg  1.071301e-03  0.016465  0.166696  1.538394e-02   
img_99996.jpg  img_99996.jpg  4.202174e-04  0.000978  0.000348  3.626455e-03   
img_99998.jpg  img_99998.jpg  3.616881e-05  0.000068  0.106104  3.118407e-07   
img_99999.jpg  img_99999.jpg  8.577208e-09  0.368445  0.000013  4.160142e-07   

                     c4        c5        c6            c7        c8  \
img                                                                   
img_99994.jpg  0.012948  0.940612  0.000156  1.051636e-02  0.034079   
img_99995.jpg  0.200807  0.008122  0.061560  4.564922e-03  0.041106   
img_99996.jpg  0.986504  0.000037  0.000722  1.394862e-03  0.005899   
img_99998.jpg  0.001297  0.000041  0.891460  3.005408e-04  0.000338   
img_99999.jpg  0.002124  0.001933  0.371602  5.274902e-12  0.255883   

                         c9  
img                          
img_99994.jpg  1.286955e-03  
img_99995.jpg  4.842241e-01  
img_99996.jpg  7.004915e-05  
img_99998.jpg  3.554169e-04  
img_99999.jpg  2.753189e-07  
In [133]:
sbmObsNewDf.to_csv('img_03_fit_lgtRgrTf_SFDD_sbmt.csv', index = False)
In [134]:
print 'LeaderBoard metric for this submission:%0.5f' % (3.93155)
print 'Best score yet:%s :%0.5f' % \
    ('img_02_fit_lgtRgr_SFDD_sbmt.csv', 2.63892)
LeaderBoard metric for this submission:3.93155
Best score yet:img_02_fit_lgtRgr_SFDD_sbmt.csv :2.63892

Stop here

Following code should be in img04_fit_lgtRgrSGDTf

Let's now switch to stochastic gradient descent training instead, which is much faster.

The graph will be similar, except that instead of holding all the training data into a constant node, we create a Placeholder node which will be fed actual data at every call of sesion.run().

In [6]:
import pandas as pd
models = pd.DataFrame({'nRELUs': [0]})
#models.ix[0, 'accuracy_scoreTest'] = 0
print models
   nRELUs
0       0
In [7]:
batch_size = 128

graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tfwObsFitFtr = tf.placeholder(tf.float32,
                                    shape=(batch_size, glbImg['size'] * glbImg['size']))
  tfwObsFitRsp = tf.placeholder(tf.float32, shape=(batch_size, glbRspClassN))
  tfwObsVldFtr = tf.constant(glbXVld)
  tfwObsNewFtr = tf.constant(glbXNew)
  
  # Variables.
  tfwW = tf.Variable(
    tf.truncated_normal([glbImg['size'] * glbImg['size'], glbRspClassN]))
  tfwB = tf.Variable(tf.zeros([glbRspClassN]))
  print(tfwW.initialized_value())
  print(tfwB.initialized_value())    
  
  # Training computation.
  logits = tf.matmul(tfwObsFitFtr, tfwW) + tfwB
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tfwObsFitRsp))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  tfwObsTrnPred = tf.nn.softmax(logits)
  tfwObsVldPred = tf.nn.softmax(
    tf.matmul(tfwObsVldFtr, tfwW) + tfwB)
  tfwObsNewPred = tf.nn.softmax(tf.matmul(tfwObsNewFtr, tfwW) + tfwB)
Tensor("Identity:0", shape=TensorShape([Dimension(784), Dimension(10)]), dtype=float32)
Tensor("Identity_1:0", shape=TensorShape([Dimension(10)]), dtype=float32)

Let's run it:

In [8]:
nStepsTfw = 3001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(nStepsTfw):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (glbYFit.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = glbXFit[offset:(offset + batch_size), :]
    batch_labels = glbYFit[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tfwObsFitFtr : batch_data, tfwObsFitRsp : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, tfwObsTrnPred], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy_score: %.1f%%" % accuracy_score(predictions, batch_labels))
      print("Validation accuracy_score: %.1f%%" % accuracy_score(
        tfwObsVldPred.eval(), glbYVld))
  print("Test accuracy_score: %.1f%%" % accuracy_score(tfwObsNewPred.eval(), glbYNew))
  models.ix[0, 'accuracy_scoreVld'] = accuracy_score(tfwObsVldPred.eval(), glbYVld)
  models.ix[0, 'accuracy_scoreTst'] = accuracy_score( tfwObsNewPred.eval(),  glbYNew)
Initialized
Minibatch loss at step 0: 17.272371
Minibatch accuracy: 6.2%
Validation accuracy: 13.0%
Minibatch loss at step 500: 1.435902
Minibatch accuracy: 76.6%
Validation accuracy: 75.2%
Minibatch loss at step 1000: 1.280029
Minibatch accuracy: 78.1%
Validation accuracy: 77.2%
Minibatch loss at step 1500: 1.147653
Minibatch accuracy: 77.3%
Validation accuracy: 77.2%
Minibatch loss at step 2000: 1.262677
Minibatch accuracy: 72.7%
Validation accuracy: 77.9%
Minibatch loss at step 2500: 0.777248
Minibatch accuracy: 83.6%
Validation accuracy: 77.3%
Minibatch loss at step 3000: 1.085464
Minibatch accuracy: 77.3%
Validation accuracy: 78.6%
Test accuracy: 86.1%
In [9]:
models.ix[0, 'graph'] = graph
print(models)
   nRELUs  accuracyVld  accuracyTst  \
0       0        78.57    86.098056   

                                               graph  
0  <tensorflow.python.framework.ops.Graph object ...  

Problem

Turn the logistic regression example with SGD into a 1-hidden layer neural network with rectified linear units (nn.relu()) and 1024 hidden nodes. This model should improve your validation / test accuracy_score.


In [10]:
nRELUs = [2 ** thsRelu for thsRelu in xrange(11)]
print(nRELUs)
for thsRelu in nRELUs:
    models.ix[thsRelu, 'nRELUs'] = thsRelu

print models
[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
      nRELUs  accuracyVld  accuracyTst  \
0          0        78.57    86.098056   
1          1          NaN          NaN   
2          2          NaN          NaN   
4          4          NaN          NaN   
8          8          NaN          NaN   
16        16          NaN          NaN   
32        32          NaN          NaN   
64        64          NaN          NaN   
128      128          NaN          NaN   
256      256          NaN          NaN   
512      512          NaN          NaN   
1024    1024          NaN          NaN   

                                                  graph  
0     <tensorflow.python.framework.ops.Graph object ...  
1                                                   NaN  
2                                                   NaN  
4                                                   NaN  
8                                                   NaN  
16                                                  NaN  
32                                                  NaN  
64                                                  NaN  
128                                                 NaN  
256                                                 NaN  
512                                                 NaN  
1024                                                NaN  
In [49]:
thsRelu = nRELUs[9]
batch_size = 128

graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tfwObsFitFtr = tf.placeholder(tf.float32,
                                    shape=(batch_size, glbImg['size'] * glbImg['size']))
  tfwObsFitRsp = tf.placeholder(tf.float32, shape=(batch_size, glbRspClassN))
  tfwObsVldFtr = tf.constant(glbXVld)
  tfwObsNewFtr = tf.constant(glbXNew)
  
  # Variables.
  tfwW1 = tf.Variable(
    tf.truncated_normal([glbImg['size'] * glbImg['size'], thsRelu]), name = 'tfwW1')
  tfwB1 = tf.Variable(tf.zeros([thsRelu]), name = 'tfwB1')
  tfwW2 = tf.Variable(
    tf.truncated_normal([thsRelu, glbRspClassN]), name = 'tfwW2')
  tfwB2 = tf.Variable(tf.zeros([glbRspClassN]), name = 'tfwB2')
  print(tfwW1.initialized_value())
  print(tfwB1.initialized_value())
  #print(relus.initialized_value())
  print(tfwW2.initialized_value())
  print(tfwB2.initialized_value())
  #tf.Print(relus, [relus])  
     
  # Training computation.
  layer1 = tf.matmul(tfwObsFitFtr, tfwW1) + tfwB1
  layer2 = tf.nn.relu(layer1)
  layer3 = tf.matmul(layer2, tfwW2) + tfwB2
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(layer3, tfwObsFitRsp))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  tfwObsTrnPred = tf.nn.softmax(layer3)
  tfwObsVldPred = tf.nn.softmax(
    tf.matmul(tf.nn.relu(tf.matmul(tfwObsVldFtr, tfwW1) + tfwB1), tfwW2) + tfwB2)
  tfwObsNewPred = tf.nn.softmax(
    tf.matmul(tf.nn.relu(tf.matmul(tfwObsNewFtr, tfwW1) + tfwB1), tfwW2) + tfwB2)
Tensor("Identity:0", shape=TensorShape([Dimension(784), Dimension(512)]), dtype=float32)
Tensor("Identity_1:0", shape=TensorShape([Dimension(512)]), dtype=float32)
Tensor("Identity_2:0", shape=TensorShape([Dimension(512), Dimension(10)]), dtype=float32)
Tensor("Identity_3:0", shape=TensorShape([Dimension(10)]), dtype=float32)
In [50]:
nStepsTfw = 3001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(nStepsTfw):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (glbYFit.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = glbXFit[offset:(offset + batch_size), :]
    batch_labels = glbYFit[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tfwObsFitFtr : batch_data, tfwObsFitRsp : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, tfwObsTrnPred], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy_score: %.1f%%" % accuracy_score(predictions, batch_labels))
      print("Validation accuracy_score: %.1f%%" % accuracy_score(
        tfwObsVldPred.eval(), glbYVld))
  print("Test accuracy_score: %.1f%%" % accuracy_score(tfwObsNewPred.eval(), glbYNew))
  models.ix[thsRelu, 'accuracy_scoreVld'] = accuracy_score(tfwObsVldPred.eval(), glbYVld)
  models.ix[thsRelu, 'accuracy_scoreTst'] = accuracy_score( tfwObsNewPred.eval(),  glbYNew)
  models.ix[thsRelu, 'graph'] = graph
  print(models)
Initialized
Minibatch loss at step 0: 235.251495
Minibatch accuracy: 14.1%
Validation accuracy: 23.9%
Minibatch loss at step 500: 15.635325
Minibatch accuracy: 72.7%
Validation accuracy: 77.7%
Minibatch loss at step 1000: 5.719280
Minibatch accuracy: 83.6%
Validation accuracy: 78.3%
Minibatch loss at step 1500: 3.931793
Minibatch accuracy: 76.6%
Validation accuracy: 75.8%
Minibatch loss at step 2000: 3.211185
Minibatch accuracy: 75.0%
Validation accuracy: 78.1%
Minibatch loss at step 2500: 1.988469
Minibatch accuracy: 80.5%
Validation accuracy: 78.0%
Minibatch loss at step 3000: 3.435107
Minibatch accuracy: 77.3%
Validation accuracy: 79.1%
Test accuracy: 86.7%
      nRELUs  accuracyVld  accuracyTst  \
0          0        78.57    86.098056   
1          1        19.29    19.750053   
2          2        36.26    40.087588   
4          4        64.67    71.122623   
8          8        75.80    82.861568   
16        16        79.74    86.685537   
32        32        76.77    84.287545   
64        64        78.79    86.172826   
128      128        78.86    86.210211   
256      256        79.21    86.407819   
512      512        79.12    86.674856   
1024    1024        81.26    88.335826   

                                                  graph  
0     <tensorflow.python.framework.ops.Graph object ...  
1     <tensorflow.python.framework.ops.Graph object ...  
2     <tensorflow.python.framework.ops.Graph object ...  
4     <tensorflow.python.framework.ops.Graph object ...  
8     <tensorflow.python.framework.ops.Graph object ...  
16    <tensorflow.python.framework.ops.Graph object ...  
32    <tensorflow.python.framework.ops.Graph object ...  
64    <tensorflow.python.framework.ops.Graph object ...  
128   <tensorflow.python.framework.ops.Graph object ...  
256   <tensorflow.python.framework.ops.Graph object ...  
512   <tensorflow.python.framework.ops.Graph object ...  
1024  <tensorflow.python.framework.ops.Graph object ...  
In [52]:
plt.figure()
#plt.plot(models['nRELUs'], models['accuracy_score.fit'], 'bo-', label = 'fit')
plt.plot(models['nRELUs'], models['accuracy_scoreVld'], 'rs-', label = 'vld')
plt.plot(models['nRELUs'], models['accuracy_scoreTst'], 'gp-', label = 'new')
plt.legend(loc = 'lower right')
plt.title("accuracy_score")
plt.xscale('symlog', basex=2)
axes = plt.gca()
axes.set_xlabel('nRELUs')
# axes.set_xlim([mdlDF['l1_penalty'][mdlDF['RSS.vld'].argmin()] / 10 ** 2, \
#                mdlDF['l1_penalty'][mdlDF['RSS.vld'].argmin()] * 10 ** 2])
# axes.set_ylim([0, mdlDF['RSS.vld'].min() * 1.5])
plt.show()
In [ ]: